Created
November 3, 2011 10:52
-
-
Save gboudreau/1336243 to your computer and use it in GitHub Desktop.
Greyhole executable - fixed FATAL on BT display
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
/* | |
Copyright 2009-2011 Guillaume Boudreau, Andrew Hopkinson | |
This file is part of Greyhole. | |
Greyhole is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
Greyhole is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with Greyhole. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
define('PERF', 9); | |
define('TEST', 8); | |
define('DEBUG', 7); | |
define('INFO', 6); | |
define('WARN', 4); | |
define('ERROR', 3); | |
define('CRITICAL', 2); | |
$action = 'initialize'; | |
date_default_timezone_set(date_default_timezone_get()); | |
set_error_handler("gh_error_handler"); | |
register_shutdown_function("gh_shutdown"); | |
umask(0); | |
setlocale(LC_COLLATE, "en_US.UTF-8"); | |
setlocale(LC_CTYPE, "en_US.UTF-8"); | |
if (!defined('PHP_VERSION_ID')) { | |
$version = explode('.', PHP_VERSION); | |
define('PHP_VERSION_ID', ($version[0] * 10000 + $version[1] * 100 + $version[2])); | |
} | |
$constarray = get_defined_constants(true); | |
foreach($constarray['user'] as $key => $val) { | |
eval(sprintf('$_CONSTANTS[\'%s\'] = ' . (is_int($val) || is_float($val) ? '%s' : "'%s'") . ';', addslashes($key), addslashes($val))); | |
} | |
// Cached df results | |
$last_df_time = 0; | |
$last_dfs = array(); | |
$sleep_before_task = array(); | |
if (!isset($config_file)) { | |
$config_file = '/etc/greyhole.conf'; | |
} | |
if (!isset($smb_config_file)) { | |
$smb_config_file = '/etc/samba/smb.conf'; | |
} | |
$trash_share_names = array('Greyhole Attic', 'Greyhole Trash', 'Greyhole Recycle Bin'); | |
function parse_config() { | |
global $_CONSTANTS, $storage_pool_drives, $shares_options, $minimum_free_space_pool_drives, $df_command, $config_file, $smb_config_file, $sticky_files, $db_options, $frozen_directories, $trash_share_names, $max_queued_tasks, $memory_limit, $delete_moves_to_trash; | |
$parsing_dir_selection_groups = FALSE; | |
$shares_options = array(); | |
$storage_pool_drives = array(); | |
$frozen_directories = array(); | |
$config_text = file_get_contents($config_file); | |
$delete_moves_to_trash = TRUE; | |
foreach (explode("\n", $config_text) as $line) { | |
if (preg_match("/^[ \t]*([^=\t]+)[ \t]*=[ \t]*([^#]+)/", $line, $regs)) { | |
$name = trim($regs[1]); | |
$value = trim($regs[2]); | |
if ($name[0] == '#') { | |
continue; | |
} | |
if (mb_strpos($name, 'delete_moves_to_attic') !== FALSE) { | |
$new_name = str_replace('attic', 'trash', $name); | |
#gh_log(WARN, "Deprecated option found in greyhole.conf: $name. You should change that to: $new_name"); | |
$name = $new_name; | |
} | |
if ($name == 'storage_pool_directory') { | |
$new_name = 'storage_pool_drive'; | |
#gh_log(WARN, "Deprecated option found in greyhole.conf: $name. You should change that to: $new_name"); | |
$name = $new_name; | |
} | |
$parsing_dir_selection_groups = FALSE; | |
switch($name) { | |
case 'log_level': | |
global ${$name}; | |
${$name} = $_CONSTANTS[$value]; | |
break; | |
case 'delete_moves_to_trash': // or delete_moves_to_attic | |
case 'log_memory_usage': | |
case 'balance_modified_files': | |
case 'check_for_open_files': | |
global ${$name}; | |
${$name} = trim($value) === '1' || mb_strpos(strtolower(trim($value)), 'yes') !== FALSE || mb_strpos(strtolower(trim($value)), 'true') !== FALSE; | |
break; | |
case 'storage_pool_drive': // or storage_pool_directory | |
if (preg_match("/(.*) ?, ?min_free ?: ?([0-9]+) ?gb?/i", $value, $regs)) { | |
$storage_pool_drives[] = trim($regs[1]); | |
$minimum_free_space_pool_drives[trim($regs[1])] = (float) trim($regs[2]); | |
} | |
break; | |
case 'wait_for_exclusive_file_access': | |
$shares = explode(',', str_replace(' ', '', $value)); | |
foreach ($shares as $share) { | |
$shares_options[$share]['wait_for_exclusive_file_access'] = TRUE; | |
} | |
break; | |
case 'sticky_files': | |
$last_sticky_files_dir = trim($value, '/'); | |
$sticky_files[$last_sticky_files_dir] = array(); | |
break; | |
case 'stick_into': | |
$sticky_files[$last_sticky_files_dir][] = '/' . trim($value, '/'); | |
break; | |
case 'frozen_directory': | |
$frozen_directories[] = trim($value, '/'); | |
break; | |
case 'memory_limit': | |
ini_set('memory_limit',$value); | |
$memory_limit = $value; | |
break; | |
case 'dir_selection_groups': | |
if (preg_match("/(.+):(.+)/", $value, $regs)) { | |
global $dir_selection_groups; | |
$group_name = trim($regs[1]); | |
$dirs = array_map('trim', explode(',', $regs[2])); | |
$dir_selection_groups[$group_name] = $dirs; | |
$parsing_dir_selection_groups = TRUE; | |
} | |
break; | |
case 'dir_selection_algorithm': | |
global $dir_selection_algorithm; | |
$dir_selection_algorithm = DirectorySelection::parse($value, @$dir_selection_groups); | |
break; | |
default: | |
if (mb_strpos($name, 'num_copies') === 0) { | |
$share = mb_substr($name, 11, mb_strlen($name)-12); | |
if (mb_stripos($value, 'max') === 0) { | |
$value = 9999; | |
} | |
$shares_options[$share]['num_copies'] = (int) $value; | |
} else if (mb_strpos($name, 'delete_moves_to_trash') === 0) { | |
$share = mb_substr($name, 22, mb_strlen($name)-23); | |
$shares_options[$share]['delete_moves_to_trash'] = trim($value) === '1' || mb_strpos(strtolower(trim($value)), 'yes') !== FALSE || mb_strpos(strtolower(trim($value)), 'true') !== FALSE; | |
} else if (mb_strpos($name, 'dir_selection_groups') === 0) { | |
$share = mb_substr($name, 21, mb_strlen($name)-22); | |
if (preg_match("/(.+):(.+)/", $value, $regs)) { | |
$group_name = trim($regs[1]); | |
$dirs = array_map('trim', explode(',', $regs[2])); | |
$shares_options[$share]['dir_selection_groups'][$group_name] = $dirs; | |
$parsing_dir_selection_groups = $share; | |
} | |
} else if (mb_strpos($name, 'dir_selection_algorithm') === 0) { | |
$share = mb_substr($name, 24, mb_strlen($name)-25); | |
if (!isset($shares_options[$share]['dir_selection_groups'])) { | |
$shares_options[$share]['dir_selection_groups'] = @$dir_selection_groups; | |
} | |
$shares_options[$share]['dir_selection_algorithm'] = DirectorySelection::parse($value, $shares_options[$share]['dir_selection_groups']); | |
} else { | |
global ${$name}; | |
if (is_numeric($value)) { | |
${$name} = (int) $value; | |
} else { | |
${$name} = $value; | |
} | |
} | |
} | |
} else if ($parsing_dir_selection_groups !== FALSE) { | |
$value = trim($line); | |
if (strlen($value) == 0 || $value[0] == '#') { | |
continue; | |
} | |
if (preg_match("/(.+):(.+)/", $value, $regs)) { | |
$group_name = trim($regs[1]); | |
$dirs = array_map('trim', explode(',', $regs[2])); | |
if (is_string($parsing_dir_selection_groups)) { | |
$share = $parsing_dir_selection_groups; | |
$shares_options[$share]['dir_selection_groups'][$group_name] = $dirs; | |
} else { | |
$dir_selection_groups[$group_name] = $dirs; | |
} | |
} | |
} | |
} | |
if (is_array($storage_pool_drives) && count($storage_pool_drives) > 0) { | |
$df_command = "df -k"; | |
foreach ($storage_pool_drives as $key => $sp_drive) { | |
$df_command .= " " . escapeshellarg($sp_drive); | |
$storage_pool_drives[$key] = '/' . trim($sp_drive, '/'); | |
} | |
$df_command .= " 2>&1 | grep '%' | grep -v \"^df: .*: No such file or directory$\""; | |
} else { | |
gh_log(WARN, "You have no storage_pool_drive defined. Greyhole can't run."); | |
return FALSE; | |
} | |
$config_text = file_get_contents($smb_config_file); | |
foreach (explode("\n", $config_text) as $line) { | |
$line = trim($line); | |
if (mb_strlen($line) == 0) { continue; } | |
if ($line[0] == '[' && preg_match('/\[([^\]]+)\]/', $line, $regs)) { | |
$share_name = $regs[1]; | |
} | |
if (isset($share_name) && !isset($shares_options[$share_name]) && array_search($share_name, $trash_share_names) === FALSE) { continue; } | |
if (isset($share_name) && preg_match('/^\s*path[ \t]*=[ \t]*(.+)$/i', $line, $regs)) { | |
$shares_options[$share_name]['landing_zone'] = '/' . trim($regs[1], '/'); | |
$shares_options[$share_name]['name'] = $share_name; | |
} | |
} | |
global $dir_selection_algorithm; | |
if (isset($dir_selection_algorithm)) { | |
foreach ($dir_selection_algorithm as $ds) { | |
$ds->update(); | |
} | |
} else { | |
// Default dir_selection_algorithm | |
$dir_selection_algorithm = DirectorySelection::parse('most_available_space', null); | |
} | |
foreach ($shares_options as $share_name => $share_options) { | |
if (array_search($share_name, $trash_share_names) !== FALSE) { | |
global $trash_share; | |
$trash_share = array('name' => $share_name, 'landing_zone' => $shares_options[$share_name]['landing_zone']); | |
unset($shares_options[$share_name]); | |
continue; | |
} | |
if ($share_options['num_copies'] > count($storage_pool_drives)) { | |
$share_options['num_copies'] = count($storage_pool_drives); | |
} | |
if (!isset($share_options['landing_zone'])) { | |
global $config_file, $smb_config_file; | |
gh_log(WARN, "Found a share ($share_name) defined in $config_file with no path in $smb_config_file. Either add this share in $smb_config_file, or remove it from $config_file, then restart Greyhole."); | |
return FALSE; | |
} | |
if (!isset($share_options['delete_moves_to_trash'])) { | |
$share_options['delete_moves_to_trash'] = $delete_moves_to_trash; | |
} | |
if (isset($share_options['dir_selection_algorithm'])) { | |
foreach ($share_options['dir_selection_algorithm'] as $ds) { | |
$ds->update(); | |
} | |
} else { | |
$share_options['dir_selection_algorithm'] = $dir_selection_algorithm; | |
} | |
if (isset($share_options['dir_selection_groups'])) { | |
unset($share_options['dir_selection_groups']); | |
} | |
$shares_options[$share_name] = $share_options; | |
// Validate that the landing zone is NOT a subdirectory of a storage pool drive! | |
foreach ($storage_pool_drives as $key => $sp_drive) { | |
if (mb_strpos($share_options['landing_zone'], $sp_drive) === 0) { | |
gh_log(CRITICAL, "Found a share ($share_name), with path " . $share_options['landing_zone'] . ", which is INSIDE a storage pool drive ($sp_drive). Share directories should never be inside a directory that you have in your storage pool.\nFor your shares to use your storage pool, you just need them to have 'vfs objects = greyhole' in their (smb.conf) config; their location on your file system is irrelevant."); | |
} | |
} | |
} | |
if (!isset($db_engine)) { | |
$db_engine = 'mysql'; | |
} else { | |
$db_engine = mb_strtolower($db_engine); | |
} | |
global ${"db_use_$db_engine"}; | |
${"db_use_$db_engine"} = TRUE; | |
if (!isset($max_queued_tasks)) { | |
if ($db_engine == 'sqlite') { | |
$max_queued_tasks = 1000; | |
} else { | |
$max_queued_tasks = 10000000; | |
} | |
} | |
if (!isset($memory_limit)) { | |
$memory_limit = '128M'; | |
ini_set('memory_limit',$memory_limit); | |
} | |
if (isset($memory_limit)){ | |
if(preg_match('/M$/',$memory_limit)){ | |
$memory_limit = preg_replace('/M$/','',$memory_limit); | |
$memory_limit = $memory_limit * 1048576; | |
}elseif(preg_match('/K$/',$memory_limit)){ | |
$memory_limit = preg_replace('/K$/','',$memory_limit); | |
$memory_limit = $memory_limit * 1024; | |
} | |
} | |
$db_options = (object) array( | |
'engine' => $db_engine, | |
'schema' => "/usr/share/greyhole/schema-$db_engine.sql" | |
); | |
if ($db_options->engine == 'sqlite') { | |
$db_options->db_path = $db_path; | |
$db_options->dbh = null; // internal handle to use with sqlite | |
} else { | |
$db_options->host = $db_host; | |
$db_options->user = $db_user; | |
$db_options->pass = $db_pass; | |
$db_options->name = $db_name; | |
} | |
/* | |
Small abstraction layer for supporting MySQL and SQLite based | |
on a user choice. Specify | |
db_engine = sqlite | |
db_path = /var/cache/greyhole.sqlite | |
in /etc/greyhole.conf to enable sqlite support, otherwise the | |
standard Greyhole MySQL support will be used. | |
Carlos Puchol, Amahi | |
[email protected] | |
*/ | |
if ($db_options->engine == 'sqlite') { | |
function db_connect() { | |
global $db_options; | |
if (!file_exists($db_options->db_path)) { | |
// create the db automatically if it does not exist | |
system("sqlite3 $db_options->db_path < $db_options->schema"); | |
} | |
$db_options->dbh = new PDO("sqlite:" . $db_options->db_path); | |
return $db_options->dbh; | |
} | |
function db_query($query) { | |
global $db_options; | |
return $db_options->dbh->query($query); | |
} | |
function db_escape_string($string) { | |
global $db_options; | |
$escaped_string = $db_options->dbh->quote($string); | |
return substr($escaped_string, 1, strlen($escaped_string)-2); | |
} | |
function db_fetch_object($result) { | |
return $result->fetchObject(); | |
} | |
function db_free_result($result) { | |
return TRUE; | |
} | |
function db_insert_id() { | |
global $db_options; | |
return $db_options->dbh->lastInsertId(); | |
} | |
function db_error() { | |
global $db_options; | |
$error = $db_options->dbh->errorInfo(); | |
return $error[2]; | |
} | |
} else { | |
// MySQL | |
function db_connect() { | |
global $db_options; | |
$connected = mysql_connect($db_options->host, $db_options->user, $db_options->pass); | |
if ($connected) { | |
$connected = mysql_select_db($db_options->name); | |
if ($connected) { | |
db_query("SET SESSION group_concat_max_len = 1048576"); | |
db_query("SET SESSION wait_timeout = 86400"); # Allow 24h fsck! | |
} | |
} | |
return $connected; | |
} | |
function db_query($query) { | |
return mysql_query($query); | |
} | |
function db_escape_string($string) { | |
return mysql_real_escape_string($string); | |
} | |
function db_fetch_object($result) { | |
return mysql_fetch_object($result); | |
} | |
function db_free_result($result) { | |
return mysql_free_result($result); | |
} | |
function db_insert_id() { | |
return mysql_insert_id(); | |
} | |
function db_error() { | |
return mysql_error(); | |
} | |
} | |
function db_migrate() { | |
global $db_options, $db_use_mysql, $db_use_sqlite; | |
// Migration #1 (complete = frozen|thawed) | |
if (@$db_use_mysql) { | |
$query = "DESCRIBE tasks"; | |
$result = db_query($query) or die("Can't describe tasks with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
if ($row->Field == 'complete') { | |
if ($row->Type == "enum('yes','no')") { | |
// migrate | |
db_query("ALTER TABLE tasks CHANGE complete complete ENUM('yes','no','frozen','thawed') NOT NULL"); | |
db_query("ALTER TABLE tasks_completed CHANGE complete complete ENUM('yes','no','frozen','thawed') NOT NULL"); | |
} | |
break; | |
} | |
} | |
} else if (@$db_use_sqlite) { | |
$query = "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'tasks'"; | |
$result = db_query($query) or die("Can't describe tasks with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
if (strpos($row->sql, 'complete BOOL NOT NULL') !== FALSE) { | |
// migrate; not supported! @see http://sqlite.org/omitted.html | |
gh_log(CRITICAL, "Your SQLite database is not up to date. Column tasks.complete needs to be a TINYTEXT. Please fix, then retry."); | |
} | |
} | |
} | |
// Migration #2 (complete = idle) | |
if (@$db_use_mysql) { | |
$query = "DESCRIBE tasks"; | |
$result = db_query($query) or die("Can't describe tasks with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
if ($row->Field == 'complete') { | |
if ($row->Type == "enum('yes','no','frozen','thawed')") { | |
// migrate | |
db_query("ALTER TABLE tasks CHANGE complete complete ENUM('yes','no','frozen','thawed','idle') NOT NULL"); | |
db_query("ALTER TABLE tasks_completed CHANGE complete complete ENUM('yes','no','frozen','thawed','idle') NOT NULL"); | |
} | |
break; | |
} | |
} | |
} | |
// Migration #3 (larger settings.value: tinytext > text) | |
if (@$db_use_mysql) { | |
$query = "DESCRIBE settings"; | |
$result = db_query($query) or die("Can't describe settings with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
if ($row->Field == 'value') { | |
if ($row->Type == "tinytext") { | |
// migrate | |
db_query("ALTER TABLE settings CHANGE value value TEXT CHARACTER SET latin1 COLLATE latin1_swedish_ci NOT NULL"); | |
} | |
break; | |
} | |
} | |
} | |
// Migration #4 (new index for find_next_task function, used by simplify_task, and also for execute_next_task function; also remove deprecated indexes) | |
if (@$db_use_mysql) { | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'find_next_task'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result) === FALSE) { | |
// migrate | |
db_query("ALTER TABLE tasks ADD INDEX find_next_task (complete, share(64), id)"); | |
} | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'incomplete_open'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result)) { | |
// migrate | |
db_query("ALTER TABLE tasks DROP INDEX incomplete_open"); | |
} | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'subsequent_writes'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result)) { | |
// migrate | |
db_query("ALTER TABLE tasks DROP INDEX subsequent_writes"); | |
} | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'unneeded_unlinks'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result)) { | |
// migrate | |
db_query("ALTER TABLE tasks DROP INDEX unneeded_unlinks"); | |
} | |
} | |
// Migration #5 (fix find_next_task index) | |
if (@$db_use_mysql) { | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'find_next_task' and Column_name = 'share'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result) !== FALSE) { | |
// migrate | |
db_query("ALTER TABLE tasks DROP INDEX find_next_task ADD INDEX find_next_task (complete, id)"); | |
} | |
} | |
// Migration #6 (new indexes for md5_worker_thread/gh_check_md5 functions) | |
if (@$db_use_mysql) { | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'md5_worker'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result) === FALSE) { | |
// migrate | |
db_query("ALTER TABLE tasks ADD INDEX md5_worker (action, complete, additional_info(100), id)"); | |
} | |
$query = "SHOW INDEX FROM tasks WHERE Key_name = 'md5_checker'"; | |
$result = db_query($query) or die("Can't show index with query: $query - Error: " . db_error()); | |
if (db_fetch_object($result) === FALSE) { | |
// migrate | |
db_query("ALTER TABLE tasks ADD INDEX md5_checker (action, share(64), full_path(255), complete)"); | |
} | |
$query = "DESCRIBE tasks"; | |
$result = db_query($query) or die("Can't describe tasks with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
if ($row->Field == 'additional_info') { | |
if ($row->Type == "tinytext") { | |
// migrate | |
db_query("ALTER TABLE tasks CHANGE additional_info additional_info TEXT CHARACTER SET latin1 COLLATE latin1_swedish_ci NULL"); | |
} | |
break; | |
} | |
} | |
} | |
} | |
if (strtolower($greyhole_log_file) == 'syslog') { | |
openlog("Greyhole", LOG_PID, LOG_USER); | |
} | |
if (!isset($balance_modified_files)) { | |
global $balance_modified_files; | |
$balance_modified_files = FALSE; | |
} | |
return TRUE; | |
} | |
function clean_dir($dir) { | |
if ($dir[0] == '.' && $dir[1] == '/') { | |
$dir = mb_substr($dir, 2); | |
} | |
while (mb_strpos($dir, '//') !== FALSE) { | |
$dir = str_replace("//", "/", $dir); | |
} | |
return $dir; | |
} | |
function explode_full_path($full_path) { | |
return array(dirname($full_path), basename($full_path)); | |
} | |
function gh_log($local_log_level, $text) { | |
global $greyhole_log_file, $log_level, $log_memory_usage, $action, $log_to_stdout; | |
if ($local_log_level > $log_level) { | |
return; | |
} | |
$date = date("M d H:i:s"); | |
if ($log_level >= PERF) { | |
$utimestamp = microtime(true); | |
$timestamp = floor($utimestamp); | |
$date .= '.' . round(($utimestamp - $timestamp) * 1000000); | |
} | |
$log_text = sprintf("%s%s%s\n", | |
"$date $local_log_level $action: ", | |
$text, | |
$log_memory_usage ? " [" . memory_get_usage() . "]" : '' | |
); | |
if (isset($log_to_stdout)) { | |
echo $log_text; | |
} else { | |
if (strtolower($greyhole_log_file) == 'syslog') { | |
$worked = syslog($local_log_level, $log_text); | |
} else { | |
$worked = error_log($log_text, 3, $greyhole_log_file); | |
} | |
if (!$worked) { | |
error_log(trim($log_text)); | |
} | |
} | |
if ($local_log_level === CRITICAL) { | |
exit(1); | |
} | |
} | |
function gh_shutdown() { | |
if ($err = error_get_last()) { | |
gh_log(ERROR, "PHP Fatal Error: " . $err['message'] . "; BT: " . basename($err['file']) . '[L' . $err['line'] . '] '); | |
} | |
} | |
function gh_error_handler($errno, $errstr, $errfile, $errline, $errcontext) { | |
if (error_reporting() === 0) { | |
// Ignored (@) warning | |
return TRUE; | |
} | |
switch ($errno) { | |
case E_ERROR: | |
case E_PARSE: | |
case E_CORE_ERROR: | |
case E_COMPILE_ERROR: | |
gh_log(CRITICAL, "PHP Error [$errno]: $errstr in $errfile on line $errline"); | |
break; | |
case E_WARNING: | |
case E_COMPILE_WARNING: | |
case E_CORE_WARNING: | |
case E_NOTICE: | |
global $greyhole_log_file; | |
if ($errstr == "fopen($greyhole_log_file): failed to open stream: Permission denied") { | |
// We want to ignore this warning. Happens when regular users try to use greyhole, and greyhole tries to log something. | |
// What would have been logged will be echoed instead. | |
return TRUE; | |
} | |
gh_log(WARN, "PHP Warning [$errno]: $errstr in $errfile on line $errline; BT: " . get_debug_bt()); | |
break; | |
default: | |
gh_log(WARN, "PHP Unknown Error [$errno]: $errstr in $errfile on line $errline"); | |
break; | |
} | |
// Don't execute PHP internal error handler | |
return TRUE; | |
} | |
function get_debug_bt() { | |
$bt = ''; | |
foreach (debug_backtrace() as $d) { | |
if ($d['function'] == 'gh_error_handler' || $d['function'] == 'get_debug_bt') { continue; } | |
if ($bt != '') { | |
$bt = " => $bt"; | |
} | |
$prefix = ''; | |
if (isset($d['file'])) { | |
$prefix = basename($d['file']) . '[L' . $d['line'] . '] '; | |
} | |
foreach ($d['args'] as $k => $v) { | |
if (is_object($v)) { | |
$d['args'][$k] = 'stdClass'; | |
} | |
} | |
$bt = $prefix . $d['function'] .'(' . implode(',', $d['args']) . ')' . $bt; | |
} | |
return $bt; | |
} | |
function bytes_to_human($bytes, $html=TRUE) { | |
$units = 'B'; | |
if (abs($bytes) > 1024) { | |
$bytes /= 1024; | |
$units = 'KB'; | |
} | |
if (abs($bytes) > 1024) { | |
$bytes /= 1024; | |
$units = 'MB'; | |
} | |
if (abs($bytes) > 1024) { | |
$bytes /= 1024; | |
$units = 'GB'; | |
} | |
if (abs($bytes) > 1024) { | |
$bytes /= 1024; | |
$units = 'TB'; | |
} | |
$decimals = (abs($bytes) > 100 ? 0 : (abs($bytes) > 10 ? 1 : 2)); | |
if ($html) { | |
return number_format($bytes, $decimals) . " <span class=\"i18n-$units\">$units</span>"; | |
} else { | |
return number_format($bytes, $decimals) . $units; | |
} | |
} | |
function duration_to_human($seconds) { | |
$displayable_duration = ''; | |
if ($seconds > 60*60) { | |
$hours = floor($seconds / (60*60)); | |
$displayable_duration .= $hours . 'h '; | |
$seconds -= $hours * (60*60); | |
} | |
if ($seconds > 60) { | |
$minutes = floor($seconds / 60); | |
$displayable_duration .= $minutes . 'm '; | |
$seconds -= $minutes * 60; | |
} | |
$displayable_duration .= $seconds . 's'; | |
return $displayable_duration; | |
} | |
function get_share_landing_zone($share) { | |
global $shares_options, $trash_share_names; | |
if (isset($shares_options[$share]['landing_zone'])) { | |
return $shares_options[$share]['landing_zone']; | |
} else if (array_search($share, $trash_share_names) !== FALSE) { | |
global $trash_share; | |
return $trash_share['landing_zone']; | |
} else { | |
global $config_file, $smb_config_file; | |
gh_log(WARN, " Found a share ($share) with no path in $smb_config_file, or missing it's num_copies[$share] config in $config_file. Skipping."); | |
return FALSE; | |
} | |
} | |
$arch = exec('uname -m'); | |
if ($arch != 'x86_64') { | |
gh_log(DEBUG, "32-bit system detected: Greyhole will NOT use PHP built-in file functions."); | |
function gh_filesize($filename) { | |
$result = exec("stat -c %s ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return (float) $result; | |
} | |
function gh_fileowner($filename) { | |
$result = exec("stat -c %u ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return (int) $result; | |
} | |
function gh_filegroup($filename) { | |
$result = exec("stat -c %g ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return (int) $result; | |
} | |
function gh_fileperms($filename) { | |
$result = exec("stat -c %a ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return "0" . $result; | |
} | |
function gh_is_file($filename) { | |
exec('[ -f '.escapeshellarg($filename).' ]', $tmp, $result); | |
return $result === 0; | |
} | |
function gh_fileinode($filename) { | |
// This function returns deviceid_inode to make sure this value will be different for files on different devices. | |
$result = exec("stat -c '%d_%i' ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return (string) $result; | |
} | |
function gh_file_deviceid($filename) { | |
$result = exec("stat -c '%d' ".escapeshellarg($filename)." 2>/dev/null"); | |
if (empty($result)) { | |
return FALSE; | |
} | |
return (string) $result; | |
} | |
function gh_rename($filename, $target_filename) { | |
exec("mv ".escapeshellarg($filename)." ".escapeshellarg($target_filename)." 2>/dev/null", $output, $result); | |
return $result === 0; | |
} | |
} else { | |
gh_log(DEBUG, "64-bit system detected: Greyhole will use PHP built-in file functions."); | |
function gh_filesize($filename) { | |
return filesize($filename); | |
} | |
function gh_fileowner($filename) { | |
return fileowner($filename); | |
} | |
function gh_filegroup($filename) { | |
return filegroup($filename); | |
} | |
function gh_fileperms($filename) { | |
return mb_substr(decoct(fileperms($filename)), -4); | |
} | |
function gh_is_file($filename) { | |
return is_file($filename); | |
} | |
function gh_fileinode($filename) { | |
// This function returns deviceid_inode to make sure this value will be different for files on different devices. | |
$stat = @stat($filename); | |
if ($stat === FALSE) { | |
return FALSE; | |
} | |
return $stat['dev'] . '_' . $stat['ino']; | |
} | |
function gh_file_deviceid($filename) { | |
$stat = @stat($filename); | |
if ($stat === FALSE) { | |
return FALSE; | |
} | |
return $stat['dev']; | |
} | |
function gh_rename($filename, $target_filename) { | |
return rename($filename, $target_filename); | |
} | |
} | |
function memory_check(){ | |
global $memory_limit; | |
$usage = memory_get_usage(); | |
$used = $usage/$memory_limit; | |
$used = $used * 100; | |
if ($used > 95) { | |
gh_log(CRITICAL, $used . '% memory usage, exiting. Please increase memory_limit in /etc/greyhole.conf'); | |
} | |
} | |
class metafile_iterator implements Iterator { | |
private $path; | |
private $share; | |
private $load_nok_metafiles; | |
private $quiet; | |
private $check_symlink; | |
private $metafiles; | |
private $metastores; | |
private $dir_handle; | |
public function __construct($share, $path, $load_nok_metafiles=FALSE, $quiet=FALSE, $check_symlink=TRUE) { | |
$this->quiet = $quiet; | |
$this->share = $share; | |
$this->path = $path; | |
$this->check_symlink = $check_symlink; | |
$this->load_nok_metafiles = $load_nok_metafiles; | |
} | |
public function rewind(){ | |
$this->metastores = get_metastores(); | |
$this->directory_stack = array($this->path); | |
$this->dir_handle = NULL; | |
$this->metafiles = array(); | |
$this->next(); | |
} | |
public function current(){ | |
return $this->metafiles; | |
} | |
public function key() { | |
return count($this->metafiles); | |
} | |
public function next() { | |
$this->metafiles = array(); | |
while(count($this->directory_stack)>0 && $this->directory_stack !== NULL){ | |
$this->dir = array_pop($this->directory_stack); | |
if (!$this->quiet) { | |
gh_log(DEBUG, "Loading metadata files for (dir) " . clean_dir($this->share . (!empty($this->dir) ? "/" . $this->dir : "")) . " ..."); | |
} | |
for( $i = 0; $i < count($this->metastores); $i++ ){ | |
$metastore = $this->metastores[$i]; | |
$this->base = "$metastore/".$this->share."/"; | |
if(!file_exists($this->base.$this->dir)){ | |
continue; | |
} | |
if($this->dir_handle = opendir($this->base.$this->dir)){ | |
while (false !== ($file = readdir($this->dir_handle))){ | |
memory_check(); | |
if($file=='.' || $file=='..') | |
continue; | |
if(!empty($this->dir)){ | |
$full_filename = $this->dir . '/' . $file; | |
}else | |
$full_filename = $file; | |
if(is_dir($this->base.$full_filename)) | |
$this->directory_stack[] = $full_filename; | |
else{ | |
$full_filename = str_replace("$this->path/",'',$full_filename); | |
if(isset($this->metafiles[$full_filename])) { | |
continue; | |
} | |
$this->metafiles[$full_filename] = get_metafiles_for_file($this->share, "$this->dir", $file, $this->load_nok_metafiles, $this->quiet, $this->check_symlink); | |
} | |
} | |
closedir($this->dir_handle); | |
$this->directory_stack = array_unique($this->directory_stack); | |
} | |
} | |
if(count($this->metafiles) > 0){ | |
break; | |
} | |
} | |
if (!$this->quiet) { | |
gh_log(DEBUG, 'Found ' . count($this->metafiles) . ' metadata files.'); | |
} | |
return $this->metafiles; | |
} | |
public function valid(){ | |
return count($this->metafiles) > 0; | |
} | |
} | |
function _getopt ( ) { | |
/* _getopt(): Ver. 1.3 2009/05/30 | |
My page: http://www.ntu.beautifulworldco.com/weblog/?p=526 | |
Usage: _getopt ( [$flag,] $short_option [, $long_option] ); | |
Note that another function split_para() is required, which can be found in the same | |
page. | |
_getopt() fully simulates getopt() which is described at | |
http://us.php.net/manual/en/function.getopt.php , including long options for PHP | |
version under 5.3.0. (Prior to 5.3.0, long options was only available on few systems) | |
Besides legacy usage of getopt(), I also added a new option to manipulate your own | |
argument lists instead of those from command lines. This new option can be a string | |
or an array such as | |
$flag = "-f value_f -ab --required 9 --optional=PK --option -v test -k"; | |
or | |
$flag = array ( "-f", "value_f", "-ab", "--required", "9", "--optional=PK", "--option" ); | |
So there are four ways to work with _getopt(), | |
1. _getopt ( $short_option ); | |
it's a legacy usage, same as getopt ( $short_option ). | |
2. _getopt ( $short_option, $long_option ); | |
it's a legacy usage, same as getopt ( $short_option, $long_option ). | |
3. _getopt ( $flag, $short_option ); | |
use your own argument lists instead of command line arguments. | |
4. _getopt ( $flag, $short_option, $long_option ); | |
use your own argument lists instead of command line arguments. | |
*/ | |
if ( func_num_args() == 1 ) { | |
$flag = $flag_array = $GLOBALS['argv']; | |
$short_option = func_get_arg ( 0 ); | |
$long_option = array (); | |
return getopt($short_option); | |
} else if ( func_num_args() == 2 ) { | |
if ( is_array ( func_get_arg ( 1 ) ) ) { | |
$flag = $GLOBALS['argv']; | |
$short_option = func_get_arg ( 0 ); | |
$long_option = func_get_arg ( 1 ); | |
if (PHP_VERSION_ID >= 50300) { return getopt($short_option, $long_option); } | |
} else { | |
$flag = func_get_arg ( 0 ); | |
$short_option = func_get_arg ( 1 ); | |
$long_option = array (); | |
return getopt($short_option); | |
} | |
} else if ( func_num_args() == 3 ) { | |
$flag = func_get_arg ( 0 ); | |
$short_option = func_get_arg ( 1 ); | |
$long_option = func_get_arg ( 2 ); | |
if (PHP_VERSION_ID >= 50300) { return getopt($short_option, $long_option); } | |
} else { | |
exit ( "wrong options\n" ); | |
} | |
$short_option = trim ( $short_option ); | |
$short_no_value = array(); | |
$short_required_value = array(); | |
$short_optional_value = array(); | |
$long_no_value = array(); | |
$long_required_value = array(); | |
$long_optional_value = array(); | |
$options = array(); | |
for ( $i = 0; $i < strlen ( $short_option ); ) { | |
if ( $short_option{$i} != ":" ) { | |
if ( $i == strlen ( $short_option ) - 1 ) { | |
$short_no_value[] = $short_option{$i}; | |
break; | |
} else if ( $short_option{$i+1} != ":" ) { | |
$short_no_value[] = $short_option{$i}; | |
$i++; | |
continue; | |
} else if ( $short_option{$i+1} == ":" && $short_option{$i+2} != ":" ) { | |
$short_required_value[] = $short_option{$i}; | |
$i += 2; | |
continue; | |
} else if ( $short_option{$i+1} == ":" && $short_option{$i+2} == ":" ) { | |
$short_optional_value[] = $short_option{$i}; | |
$i += 3; | |
continue; | |
} | |
} else { | |
continue; | |
} | |
} | |
foreach ( $long_option as $a ) { | |
if ( substr( $a, -2 ) == "::" ) { | |
$long_optional_value[] = substr( $a, 0, -2); | |
continue; | |
} else if ( substr( $a, -1 ) == ":" ) { | |
$long_required_value[] = substr( $a, 0, -1 ); | |
continue; | |
} else { | |
$long_no_value[] = $a; | |
continue; | |
} | |
} | |
if ( is_array ( $flag ) ) | |
$flag_array = $flag; | |
else { | |
$flag = "- $flag"; | |
$flag_array = split_para( $flag ); | |
} | |
for ( $i = 0; $i < count( $flag_array ); ) { | |
if ( $i >= count ( $flag_array ) ) | |
break; | |
if ( ! $flag_array[$i] || $flag_array[$i] == "-" ) { | |
$i++; | |
continue; | |
} | |
if ( $flag_array[$i]{0} != "-" ) { | |
$i++; | |
continue; | |
} | |
if ( substr( $flag_array[$i], 0, 2 ) == "--" ) { | |
if (strpos($flag_array[$i], '=') != false) { | |
list($key, $value) = explode('=', substr($flag_array[$i], 2), 2); | |
if ( in_array ( $key, $long_required_value ) || in_array ( $key, $long_optional_value ) ) | |
$options[$key][] = $value; | |
$i++; | |
continue; | |
} | |
if (strpos($flag_array[$i], '=') == false) { | |
$key = substr( $flag_array[$i], 2 ); | |
if ( in_array( substr( $flag_array[$i], 2 ), $long_required_value ) ) { | |
$options[$key][] = $flag_array[$i+1]; | |
$i += 2; | |
continue; | |
} else if ( in_array( substr( $flag_array[$i], 2 ), $long_optional_value ) ) { | |
if ( $flag_array[$i+1] != "" && $flag_array[$i+1]{0} != "-" ) { | |
$options[$key][] = $flag_array[$i+1]; | |
$i += 2; | |
} else { | |
$options[$key][] = FALSE; | |
$i ++; | |
} | |
continue; | |
} else if ( in_array( substr( $flag_array[$i], 2 ), $long_no_value ) ) { | |
$options[$key][] = FALSE; | |
$i++; | |
continue; | |
} else { | |
$i++; | |
continue; | |
} | |
} | |
} else if ( $flag_array[$i]{0} == "-" && $flag_array[$i]{1} != "-" ) { | |
for ( $j=1; $j < strlen($flag_array[$i]); $j++ ) { | |
if ( in_array( $flag_array[$i]{$j}, $short_required_value ) || in_array( $flag_array[$i]{$j}, $short_optional_value )) { | |
if ( $j == strlen($flag_array[$i]) - 1 ) { | |
if ( in_array( $flag_array[$i]{$j}, $short_required_value ) ) { | |
$options[$flag_array[$i]{$j}][] = $flag_array[$i+1]; | |
$i += 2; | |
} else if ( in_array( $flag_array[$i]{$j}, $short_optional_value ) && $flag_array[$i+1] != "" && $flag_array[$i+1]{0} != "-" ) { | |
$options[$flag_array[$i]{$j}][] = $flag_array[$i+1]; | |
$i += 2; | |
} else { | |
$options[$flag_array[$i]{$j}][] = FALSE; | |
$i ++; | |
} | |
$plus_i = 0; | |
break; | |
} else { | |
$options[$flag_array[$i]{$j}][] = substr ( $flag_array[$i], $j + 1 ); | |
$i ++; | |
$plus_i = 0; | |
break; | |
} | |
} else if ( in_array ( $flag_array[$i]{$j}, $short_no_value ) ) { | |
$options[$flag_array[$i]{$j}][] = FALSE; | |
$plus_i = 1; | |
continue; | |
} else { | |
$plus_i = 1; | |
break; | |
} | |
} | |
$i += $plus_i; | |
continue; | |
} | |
$i++; | |
continue; | |
} | |
foreach ( $options as $key => $value ) { | |
if ( count ( $value ) == 1 ) { | |
$options[ $key ] = $value[0]; | |
} | |
} | |
return $options; | |
} | |
function split_para ( $pattern ) { | |
/* split_para() version 1.0 2008/08/19 | |
My page: http://www.ntu.beautifulworldco.com/weblog/?p=526 | |
This function is to parse parameters and split them into smaller pieces. | |
preg_split() does similar thing but in our function, besides "space", we | |
also take the three symbols " (double quote), '(single quote), | |
and \ (backslash) into consideration because things in a pair of " or ' | |
should be grouped together. | |
As an example, this parameter list | |
-f "test 2" -ab --required "t\"est 1" --optional="te'st 3" --option -v 'test 4' | |
will be splited into | |
-f | |
t"est 2 | |
-ab | |
--required | |
test 1 | |
--optional=te'st 3 | |
--option | |
-v | |
test 4 | |
see the code below, | |
$pattern = "-f \"test 2\" -ab --required \"t\\\"est 1\" --optional=\"te'st 3\" --option -v 'test 4'"; | |
$result = split_para( $pattern ); | |
echo "ORIGINAL PATTERN: $pattern\n\n"; | |
var_dump( $result ); | |
*/ | |
$begin=0; | |
$backslash = 0; | |
$quote = ""; | |
$quote_mark = array(); | |
$result = array(); | |
$pattern = trim ( $pattern ); | |
for ( $end = 0; $end < strlen ( $pattern ) ; ) { | |
if ( ! in_array ( $pattern{$end}, array ( " ", "\"", "'", "\\" ) ) ) { | |
$backslash = 0; | |
$end ++; | |
continue; | |
} | |
if ( $pattern{$end} == "\\" ) { | |
$backslash++; | |
$end ++; | |
continue; | |
} else if ( $pattern{$end} == "\"" ) { | |
if ( $backslash % 2 == 1 || $quote == "'" ) { | |
$backslash = 0; | |
$end ++; | |
continue; | |
} | |
if ( $quote == "" ) { | |
$quote_mark[] = $end - $begin; | |
$quote = "\""; | |
} else if ( $quote == "\"" ) { | |
$quote_mark[] = $end - $begin; | |
$quote = ""; | |
} | |
$backslash = 0; | |
$end ++; | |
continue; | |
} else if ( $pattern{$end} == "'" ) { | |
if ( $backslash % 2 == 1 || $quote == "\"" ) { | |
$backslash = 0; | |
$end ++; | |
continue; | |
} | |
if ( $quote == "" ) { | |
$quote_mark[] = $end - $begin; | |
$quote = "'"; | |
} else if ( $quote == "'" ) { | |
$quote_mark[] = $end - $begin; | |
$quote = ""; | |
} | |
$backslash = 0; | |
$end ++; | |
continue; | |
} else if ( $pattern{$end} == " " ) { | |
if ( $quote != "" ) { | |
$backslash = 0; | |
$end ++; | |
continue; | |
} else { | |
$backslash = 0; | |
$cand = substr( $pattern, $begin, $end-$begin ); | |
for ( $j = 0; $j < strlen ( $cand ); $j ++ ) { | |
if ( in_array ( $j, $quote_mark ) ) | |
continue; | |
$cand1 .= $cand{$j}; | |
} | |
if ( $cand1 ) { | |
eval( "\$cand1 = \"$cand1\";" ); | |
$result[] = $cand1; | |
} | |
$quote_mark = array(); | |
$cand1 = ""; | |
$end ++; | |
$begin = $end; | |
continue; | |
} | |
} | |
} | |
$cand = substr( $pattern, $begin, $end-$begin ); | |
for ( $j = 0; $j < strlen ( $cand ); $j ++ ) { | |
if ( in_array ( $j, $quote_mark ) ) | |
continue; | |
$cand1 .= $cand{$j}; | |
} | |
eval( "\$cand1 = \"$cand1\";" ); | |
if ( $cand1 ) | |
$result[] = $cand1; | |
return $result; | |
} | |
function kshift(&$arr) { | |
if (count($arr) == 0) { | |
return FALSE; | |
} | |
foreach ($arr as $k => $v) { | |
unset($arr[$k]); | |
break; | |
} | |
return array($k, $v); | |
} | |
function kshuffle(&$array) { | |
if (!is_array($array)) { return $array; } | |
$keys = array_keys($array); | |
shuffle($keys); | |
$random = array(); | |
foreach ($keys as $key) { | |
$random[$key] = $array[$key]; | |
} | |
$array = $random; | |
} | |
class DirectorySelection { | |
var $num_dirs_per_draft; | |
var $selection_algorithm; | |
var $drives; | |
var $is_custom; | |
var $sorted_target_drives; | |
var $last_resort_sorted_target_drives; | |
function __construct($num_dirs_per_draft, $selection_algorithm, $drives, $is_custom) { | |
$this->num_dirs_per_draft = $num_dirs_per_draft; | |
$this->selection_algorithm = $selection_algorithm; | |
$this->drives = $drives; | |
$this->is_custom = $is_custom; | |
} | |
function init(&$sorted_target_drives, &$last_resort_sorted_target_drives) { | |
// Shuffle or sort by available space (desc) | |
if ($this->selection_algorithm == 'random') { | |
kshuffle($sorted_target_drives); | |
kshuffle($last_resort_sorted_target_drives); | |
} else if ($this->selection_algorithm == 'most_available_space') { | |
arsort($sorted_target_drives); | |
arsort($last_resort_sorted_target_drives); | |
} | |
// Only keep drives that are in $this->drives | |
$this->sorted_target_drives = array(); | |
foreach ($sorted_target_drives as $k => $v) { | |
if (array_search($k, $this->drives) !== FALSE) { | |
$this->sorted_target_drives[$k] = $v; | |
} | |
} | |
$this->last_resort_sorted_target_drives = array(); | |
foreach ($last_resort_sorted_target_drives as $k => $v) { | |
if (array_search($k, $this->drives) !== FALSE) { | |
$this->last_resort_sorted_target_drives[$k] = $v; | |
} | |
} | |
} | |
function draft() { | |
$drives = array(); | |
$drives_last_resort = array(); | |
for ($i=0; $i<$this->num_dirs_per_draft; $i++) { | |
$arr = kshift($this->sorted_target_drives); | |
if ($arr === FALSE) { | |
break; | |
} | |
list($k, $v) = $arr; | |
$drives[$k] = $v; | |
} | |
for ($i=$i; $i<$this->num_dirs_per_draft; $i++) { | |
$arr = kshift($this->last_resort_sorted_target_drives); | |
if ($arr === FALSE) { | |
break; | |
} | |
list($k, $v) = $arr; | |
$drives_last_resort[$k] = $v; | |
} | |
return array($drives, $drives_last_resort); | |
} | |
static function parse($config_string, $dir_selection_groups) { | |
$ds = array(); | |
if ($config_string == 'random' || $config_string == 'most_available_space') { | |
global $storage_pool_drives; | |
$ds[] = new DirectorySelection(count($storage_pool_drives), $config_string, $storage_pool_drives, FALSE); | |
return $ds; | |
} | |
if (!preg_match('/forced ?\((.+)\) ?(random|most_available_space)/i', $config_string, $regs)) { | |
gh_log(CRITICAL, "Can't understand the dir_selection_algorithm value: $config_string"); | |
} | |
$selection_algorithm = $regs[2]; | |
$groups = array_map('trim', explode(',', $regs[1])); | |
foreach ($groups as $group) { | |
$group = explode(' ', preg_replace('/^([0-9]+)x/', '\\1 ', $group)); | |
$num_dirs = trim($group[0]); | |
$group_name = trim($group[1]); | |
if ($num_dirs == 'all' || $num_dirs > count($dir_selection_groups[$group_name])) { | |
$num_dirs = count($dir_selection_groups[$group_name]); | |
} | |
$ds[] = new DirectorySelection($num_dirs, $selection_algorithm, $dir_selection_groups[$group_name], TRUE); | |
} | |
return $ds; | |
} | |
function update() { | |
// Make sure num_dirs_per_draft and drives have been set, in case storage_pool_drive lines appear after dir_selection_algorithm line(s) in the config file | |
if (!$this->is_custom && ($this->selection_algorithm == 'random' || $this->selection_algorithm == 'most_available_space')) { | |
global $storage_pool_drives; | |
$this->num_dirs_per_draft = count($storage_pool_drives); | |
$this->drives = $storage_pool_drives; | |
} | |
} | |
} | |
function is_greyhole_owned_dir($path) { | |
global $going_dir; | |
if (isset($going_dir) && $path == $going_dir) { | |
return FALSE; | |
} | |
return file_exists("$path/.greyhole_uses_this"); | |
} | |
// Is it OK for a drive to be gone? | |
function gone_ok($sp_drive, $refresh=FALSE) { | |
global $gone_ok_drives; | |
if ($refresh || !isset($gone_ok_drives)) { | |
$gone_ok_drives = get_gone_ok_dirs(); | |
} | |
if (isset($gone_ok_drives[$sp_drive])) { | |
return TRUE; | |
} | |
return FALSE; | |
} | |
function get_gone_ok_dirs() { | |
global $gone_ok_drives; | |
$setting = Settings::get('Gone-OK-Drives'); | |
if ($setting) { | |
$gone_ok_drives = unserialize($setting->value); | |
} else { | |
$gone_ok_drives = array(); | |
Settings::set('Gone-OK-Drives', $gone_ok_drives); | |
} | |
return $gone_ok_drives; | |
} | |
function mark_gone_ok($sp_drive, $action='add') { | |
global $storage_pool_drives; | |
if (array_search($sp_drive, $storage_pool_drives) === FALSE) { | |
$sp_drive = '/' . trim($sp_drive, '/'); | |
} | |
if (array_search($sp_drive, $storage_pool_drives) === FALSE) { | |
return FALSE; | |
} | |
global $gone_ok_drives; | |
$gone_ok_drives = get_gone_ok_dirs(); | |
if ($action == 'add') { | |
$gone_ok_drives[$sp_drive] = TRUE; | |
} else { | |
unset($gone_ok_drives[$sp_drive]); | |
} | |
Settings::set('Gone-OK-Drives', $gone_ok_drives); | |
return TRUE; | |
} | |
function gone_fscked($sp_drive, $refresh=FALSE) { | |
global $fscked_gone_drives; | |
if ($refresh || !isset($fscked_gone_drives)) { | |
$fscked_gone_drives = get_fsck_gone_drives(); | |
} | |
if (isset($fscked_gone_drives[$sp_drive])) { | |
return TRUE; | |
} | |
return FALSE; | |
} | |
function get_fsck_gone_drives() { | |
global $fscked_gone_drives; | |
$setting = Settings::get('Gone-FSCKed-Drives'); | |
if ($setting) { | |
$fscked_gone_drives = unserialize($setting->value); | |
} else { | |
$fscked_gone_drives = array(); | |
Settings::set('Gone-FSCKed-Drives', $fscked_gone_drives); | |
} | |
return $fscked_gone_drives; | |
} | |
function mark_gone_drive_fscked($sp_drive, $action='add') { | |
global $fscked_gone_drives; | |
$fscked_gone_drives = get_fsck_gone_drives(); | |
if ($action == 'add') { | |
$fscked_gone_drives[$sp_drive] = TRUE; | |
} else { | |
unset($fscked_gone_drives[$sp_drive]); | |
} | |
Settings::set('Gone-FSCKed-Drives', $fscked_gone_drives); | |
} | |
function check_storage_pool_drives($skip_fsck=FALSE) { | |
global $storage_pool_drives, $email_to, $gone_ok_drives; | |
$needs_fsck = FALSE; | |
$returned_drives = array(); | |
$missing_drives = array(); | |
$i = 0; $j = 0; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (!is_greyhole_owned_dir($sp_drive) && !gone_fscked($sp_drive, $i++ == 0) && !file_exists("$sp_drive/.greyhole_used_this")) { | |
if($needs_fsck !== 2){ | |
$needs_fsck = 1; | |
} | |
mark_gone_drive_fscked($sp_drive); | |
$missing_drives[] = $sp_drive; | |
gh_log(WARN, "Warning! It seems $sp_drive is missing it's \".greyhole_uses_this\" file. This either means this drive is currently unmounted, or you forgot to create this file."); | |
gh_log(DEBUG, "Email sent for gone drive: $sp_drive"); | |
$gone_ok_drives[$sp_drive] = TRUE; // The upcoming fsck should not recreate missing copies just yet | |
} else if ((gone_ok($sp_drive, $j++ == 0) || gone_fscked($sp_drive, $i++ == 0)) && is_greyhole_owned_dir($sp_drive)) { | |
// $sp_drive is now back | |
$needs_fsck = 2; | |
$returned_drives[] = $sp_drive; | |
gh_log(DEBUG, "Email sent for revived drive: $sp_drive"); | |
mark_gone_ok($sp_drive, 'remove'); | |
mark_gone_drive_fscked($sp_drive, 'remove'); | |
$i = 0; $j = 0; | |
} | |
} | |
if(count($returned_drives) > 0){ | |
$body = "This is an automated email from Greyhole.\n\nIt appears one or more of your storage pool drives came back:\n"; | |
foreach ($returned_drives as $sp_drive) { | |
$body .= "$sp_drive was missing; it's now available again.\n"; | |
} | |
if (!$skip_fsck) { | |
$body .= "\nA fsck will now start, to fix the symlinks found in your shares, when possible.\nYou'll receive a report email once that fsck run completes.\n"; | |
} | |
$drive_string = join(",",$returned_drives); | |
$subject = "Storage pool drives now online on " . exec ('hostname') . ": "; | |
$subject = $subject . $drive_string; | |
if (strlen($subject) > 255) { | |
$subject = substr($subject, 0, 255); | |
} | |
mail($email_to, $subject, $body); | |
} | |
if(count($missing_drives) > 0){ | |
$body = "This is an automated email from Greyhole.\n\nIt appears one or more of your storage pool drives are missing their \".greyhole_uses_this\" file:\n"; | |
foreach ($missing_drives as $sp_drive) { | |
$body .= "$sp_drive/.greyhole_uses_this: File not found\n"; | |
} | |
$sp_drive = $missing_drives[0]; | |
$body .= "\nThis either means these mount(s) are currently unmounted, or you forgot to create this file.\n\n"; | |
$body .= "Here are your options:\n\n"; | |
$body .= "- If you forgot to create this file, you should create it ASAP, as per the INSTALL instructions. Until you do, this drive will not be part of your storage pool.\n\n"; | |
$body .= "- If the drive(s) are gone, you should either re-mount them manually (if possible), or remove them from your storage pool. To do so, use the following command:\n greyhole --gone=".escapeshellarg($sp_drive)."\n Note that the above command is REQUIRED for Greyhole to re-create missing file copies before the next fsck runs. Until either happens, missing file copies WILL NOT be re-created on other drives.\n\n"; | |
$body .= "- If you know these drive(s) will come back soon, and do NOT want Greyhole to re-create missing file copies for this drive until it reappears, you should execute this command:\n greyhole --wait-for=".escapeshellarg($sp_drive)."\n\n"; | |
if (!$skip_fsck) { | |
$body .= "A fsck will now start, to fix the symlinks found in your shares, when possible.\nYou'll receive a report email once that fsck run completes.\n"; | |
} | |
$subject = "Missing storage pool drives on " . exec ('hostname') . ": "; | |
$drive_string = join(",",$missing_drives); | |
$subject = $subject . $drive_string; | |
if (strlen($subject) > 255) { | |
$subject = substr($subject, 0, 255); | |
} | |
mail($email_to, $subject, $body); | |
} | |
if ($needs_fsck !== FALSE) { | |
set_metastore_backup(); | |
get_metastores(FALSE); // FALSE => Resets the metastores cache | |
clearstatcache(); | |
if (!$skip_fsck) { | |
global $shares_options; | |
initialize_fsck_report('All shares'); | |
if($needs_fsck === 2){ | |
foreach ($returned_drives as $drive){ | |
$metastores = get_metastores_from_storage_volume($drive); | |
gh_log(INFO, "Starting fsck for metadata store on $drive which came back online."); | |
foreach($metastores as $metastore){ | |
foreach($shares_options as $share_name => $share_options){ | |
gh_fsck_metastore($metastore,"/$share_name", $share_name); | |
} | |
} | |
gh_log(INFO, "fsck for returning drive $drive's metadata store completed."); | |
} | |
gh_log(INFO, "Starting fsck for all shares - caused by missing drive that came back online."); | |
}else{ | |
gh_log(INFO, "Starting fsck for all shares - caused by missing drive. Will just recreate symlinks to existing copies when possible; won't create new copies just yet."); | |
} | |
foreach ($shares_options as $share_name => $share_options) { | |
gh_fsck($share_options['landing_zone'], $share_name); | |
} | |
gh_log(INFO, "fsck for all shares completed."); | |
$fsck_report = get_fsck_report(); | |
gh_log(DEBUG, "Sending fsck report to $email_to"); | |
mail($email_to, 'fsck of Greyhole shares on ' . exec('hostname'), $fsck_report); | |
} | |
// Refresh $gone_ok_drives to it's real value (from the DB) | |
get_gone_ok_dirs(); | |
} | |
} | |
class FSCKLogFile { | |
const PATH = '/usr/share/greyhole'; | |
private $path; | |
private $filename; | |
private $lastEmailSentTime = 0; | |
public function __construct($filename, $path=self::PATH) { | |
$this->filename = $filename; | |
$this->path = $path; | |
} | |
public function emailAsRequired() { | |
$logfile = "$this->path/$this->filename"; | |
if (!file_exists($logfile)) { return; } | |
$last_mod_date = filemtime($logfile); | |
if ($last_mod_date > $this->getLastEmailSentTime()) { | |
global $email_to; | |
gh_log(WARN, "Sending $logfile by email to $email_to"); | |
mail($email_to, $this->getSubject(), $this->getBody()); | |
$this->lastEmailSentTime = $last_mod_date; | |
Settings::set("last_email_$this->filename", $this->lastEmailSentTime); | |
} | |
} | |
private function getBody() { | |
$logfile = "$this->path/$this->filename"; | |
if ($this->filename == 'fsck_checksums.log') { | |
return file_get_contents($logfile) . "\nNote: You should manually delete the $logfile file once you're done with it."; | |
} else if ($this->filename == 'fsck_files.log') { | |
global $fsck_report; | |
$fsck_report = unserialize(file_get_contents($logfile)); | |
unlink($logfile); | |
return get_fsck_report() . "\nNote: This report is a complement to the last report you've received. It details possible errors with files for which the fsck was postponed."; | |
} else { | |
return '[empty]'; | |
} | |
} | |
private function getSubject() { | |
if ($this->filename == 'fsck_checksums.log') { | |
return 'Mismatched checksums in Greyhole file copies'; | |
} else if ($this->filename == 'fsck_files.log') { | |
return 'fsck_files of Greyhole shares on ' . exec('hostname'); | |
} else { | |
return 'Unknown FSCK report'; | |
} | |
} | |
private function getLastEmailSentTime() { | |
if ($this->lastEmailSentTime == 0) { | |
$setting = Settings::get("last_email_$this->filename"); | |
if ($setting) { | |
$this->lastEmailSentTime = (int) $setting->value; | |
} | |
} | |
return $this->lastEmailSentTime; | |
} | |
public static function loadFSCKReport($what) { | |
$logfile = self::PATH . '/fsck_files.log'; | |
if (file_exists($logfile)) { | |
global $fsck_report; | |
$fsck_report = unserialize(file_get_contents($logfile)); | |
} else { | |
initialize_fsck_report($what); | |
} | |
} | |
public static function saveFSCKReport() { | |
global $fsck_report; | |
$logfile = self::PATH . '/fsck_files.log'; | |
file_put_contents($logfile, serialize($fsck_report)); | |
} | |
} | |
class Settings { | |
public static function get($name, $value=FALSE) { | |
$query = sprintf("SELECT * FROM settings WHERE name LIKE '%s'", $name); | |
if ($value !== FALSE) { | |
$query .= sprintf(" AND value LIKE '%s'", $value); | |
} | |
$result = db_query($query) or gh_log(CRITICAL, "Can't select setting '$name'/'$value' from settings table: " . db_error()); | |
return db_fetch_object($result); | |
} | |
public static function set($name, $value) { | |
if (is_array($value)) { | |
$value = serialize($value); | |
} | |
global $db_use_mysql; | |
if (@$db_use_mysql) { | |
$query = sprintf("INSERT INTO settings (name, value) VALUES ('%s', '%s') ON DUPLICATE KEY UPDATE value = VALUES(value)", $name, $value); | |
db_query($query) or gh_log(CRITICAL, "Can't insert/update '$name' setting: " . db_error()); | |
} else { | |
$query = sprintf("DELETE FROM settings WHERE name = '%s'", $name); | |
db_query($query) or gh_log(CRITICAL, "Can't delete '$name' setting: " . db_error()); | |
$query = sprintf("INSERT INTO settings (name, value) VALUES ('%s', '%s')", $name, $value); | |
db_query($query) or gh_log(CRITICAL, "Can't insert '$name' setting: " . db_error()); | |
} | |
return (object) array('name' => $name, 'value' => $value); | |
} | |
public static function rename($from, $to) { | |
$query = sprintf("UPDATE settings SET name = '%s' WHERE name = '%s'", $to, $from); | |
db_query($query) or gh_log(CRITICAL, "Can't rename setting '$from' to '$to': " . db_error()); | |
} | |
public static function backup() { | |
global $storage_pool_drives; | |
$result = db_query("SELECT * FROM settings") or gh_log(CRITICAL, "Can't select settings for backup: " . db_error()); | |
$settings = array(); | |
while ($setting = db_fetch_object($result)) { | |
$settings[] = $setting; | |
} | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (is_greyhole_owned_dir($sp_drive)) { | |
$settings_backup_file = "$sp_drive/.gh_settings.bak"; | |
file_put_contents($settings_backup_file, serialize($settings)); | |
} | |
} | |
} | |
public static function restore() { | |
global $storage_pool_drives; | |
foreach ($storage_pool_drives as $sp_drive) { | |
$settings_backup_file = "$sp_drive/.gh_settings.bak"; | |
$latest_backup_time = 0; | |
if (file_exists($settings_backup_file)) { | |
$last_mod_date = filemtime($settings_backup_file); | |
if ($last_mod_date > $latest_backup_time) { | |
$backup_file = $settings_backup_file; | |
$latest_backup_time = $last_mod_date; | |
} | |
} | |
} | |
if (isset($backup_file)) { | |
gh_log(INFO, "Restoring settings from last backup: $backup_file"); | |
$settings = unserialize(file_get_contents($backup_file)); | |
foreach ($settings as $setting) { | |
Settings::set($setting->name, $setting->value); | |
} | |
return TRUE; | |
} | |
return FALSE; | |
} | |
} | |
list($action, $options) = process_command_line(); | |
if ($action == 'unknown') { | |
print_usage(); | |
} | |
// Any forking needs to happen before db_connect, or the parent exiting will close the child's DB connection! | |
if ($action == 'md5-worker') { | |
$pid = pcntl_fork(); | |
if ($pid == -1) { | |
die("Error spawning child md5-worker!"); | |
} | |
if ($pid == 0) { | |
// Child | |
} else { | |
// Parent | |
echo $pid; | |
exit(0); | |
} | |
} | |
process_config(); | |
db_connect() or gh_log(CRITICAL, "Can't connect to $db_options->engine database."); | |
db_migrate(); | |
if ($action != 'stats' && $action != 'view-queue' && $action != 'debug' && $action != 'iostat' && $action != 'status' && $action != 'getuid' && $action != 'logs') { | |
if (exec("whoami") != 'root') { | |
echo "You need to execute this as root.\n"; | |
exit(1); | |
} | |
} | |
if ($action == 'getuid') { | |
$uniq_id = set_uniq_id(); | |
echo $uniq_id; | |
exit(0); | |
} | |
if ($action == 'logs') { | |
if (strtolower($greyhole_log_file) == 'syslog') { | |
if (gh_is_file('/var/log/syslog')) { | |
passthru("tail -f /var/log/syslog | grep --line-buffered Greyhole"); | |
} else { | |
passthru("tail -f /var/log/messages | grep --line-buffered Greyhole"); | |
} | |
} else { | |
passthru("tail -f " . escapeshellarg($greyhole_log_file)); | |
} | |
exit(0); | |
} | |
if ($action == 'iostat') { | |
$devices_drives = array(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
$device = exec("df " . escapeshellarg($sp_drive) . " 2>/dev/null | awk '{print \$1}'"); | |
$device = preg_replace('@/dev/(sd[a-z])[0-9]+@', '\1', $device); | |
$devices_drives[$device] = $sp_drive; | |
} | |
while (TRUE) { | |
unset($result); | |
exec("iostat -p ALL -k 10 2 | grep '^sd[a-z] ' | awk '{print \$1,\$3,\$4}'", $result); | |
$iostat = array(); | |
foreach ($result as $line) { | |
$info = explode(' ', $line); | |
$device = $info[0]; | |
$read_kBps = $info[1]; | |
$write_kBps = $info[2]; | |
if (!isset($devices_drives[$device])) { | |
# That device isn't in the storage pool. | |
continue; | |
} | |
$drive = $devices_drives[$device]; | |
$iostat[$drive] = (int) round($read_kBps + $write_kBps); | |
} | |
#ksort($iostat); // Let keep the order in which the drives were mounted | |
foreach ($iostat as $dir => $io_kBps) { | |
printf("$dir: %7s kBps\n", $io_kBps); | |
} | |
echo "---\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'thaw') { | |
if (isset($options['dir']) && array_search($options['dir'], $frozen_directories) === FALSE) { | |
$options['dir'] = '/' . trim($options['dir'], '/'); | |
} | |
if (!isset($options['dir']) || array_search($options['dir'], $frozen_directories) === FALSE) { | |
echo "Frozen directories:\n"; | |
foreach ($frozen_directories as $frozen_directory) { | |
echo " $frozen_directory\n"; | |
} | |
echo "To thaw any of the above directories, use the following command:\n"; | |
echo "greyhole --thaw=directory\n"; | |
exit(0); | |
} | |
$path = explode('/', $options['dir']); | |
$share = array_shift($path); | |
$query = sprintf("UPDATE tasks SET complete = 'thawed' WHERE complete = 'frozen' AND share = '%s' AND full_path LIKE '%s%%'", | |
db_escape_string($share), | |
db_escape_string(implode('/', $path)) | |
); | |
db_query($query) or die("Can't thaw tasks with query: $query - Error: " . db_error()); | |
echo $options['dir'] . " directory has been thawed.\n"; | |
echo "All pasts file operations that occured in this directory will now be processed by Greyhole.\n"; | |
exit(0); | |
} | |
if ($action == 'gone' || $action == 'going') { | |
global $storage_pool_drives; | |
if (array_search($options['dir'], $storage_pool_drives) === FALSE) { | |
$options['dir'] = '/' . trim($options['dir'], '/'); | |
} | |
if (array_search($options['dir'], $storage_pool_drives) === FALSE) { | |
if (!empty($options['dir'])) { | |
echo "Directory " . $options['dir'] . " is not one of your defined storage pool drives.\n"; | |
} | |
echo "Please use one of the following with the --$action option:\n "; | |
echo implode("\n ", $storage_pool_drives) . "\n"; | |
echo "Note that the correct syntax for this command is:\n"; | |
echo " greyhole --$action=<drive>\n"; | |
echo "The '=' character is mandatory.\n"; | |
exit(1); | |
} | |
if ($action == 'going') { | |
set_metastore_backup(); | |
gh_log(INFO, "Storage pool drive " . $options['dir'] . " will be removed from the storage pool."); | |
echo("Storage pool drive " . $options['dir'] . " will be removed from the storage pool.\n"); | |
// global $going_dir; // Used in function is_greyhole_owned_dir() | |
$going_dir = $options['dir']; | |
// Renaming that file will insure this drive won't be used while we're moving files away | |
gh_rename("$going_dir/.greyhole_uses_this", "$going_dir/.greyhole_used_this"); | |
// fsck shares with only 1 file copy to remove those from $options['dir'] | |
initialize_fsck_report('Shares with only 1 copy'); | |
foreach ($shares_options as $share_name => $share_options) { | |
if ($share_options['num_copies'] == 1) { | |
echo "Moving file copies for share '$share_name'... Please be patient... "; | |
gh_fsck($share_options['landing_zone'], $share_name); | |
echo "Done.\n"; | |
} else { | |
echo "Looking for soon-to-be-broken symbolic links in the share '$share_name'... Please be patient... "; | |
chdir($share_options['landing_zone']); | |
exec("find . -type l -lname " . escapeshellarg("$going_dir/*"), $result); | |
foreach ($result as $file_to_relink) { | |
if (is_link($file_to_relink)) { | |
$file_to_relink = substr($file_to_relink, 2); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (!is_greyhole_owned_dir($sp_drive)) { continue; } | |
$new_link_target = clean_dir("$sp_drive/$share_name/$file_to_relink"); | |
if (gh_is_file($new_link_target)) { | |
unlink($file_to_relink); | |
symlink($new_link_target, $file_to_relink); | |
break; | |
} | |
} | |
} | |
} | |
echo "Done.\n"; | |
} | |
} | |
} | |
// Remove $going_dir from config file and restart (if it was running) | |
$escaped_dir = str_replace('/', '\/', $options['dir']); | |
exec("/bin/sed -i 's/^.*storage_pool_directory.*$escaped_dir.*$//' /etc/greyhole.conf"); // Deprecated notation | |
exec("/bin/sed -i 's/^.*storage_pool_drive.*$escaped_dir.*$//' /etc/greyhole.conf"); | |
if (is_file('/sbin/restart')) { | |
$running = (int) exec('/sbin/status greyhole | grep running | wc -l'); | |
if ($running) { | |
exec("/sbin/restart greyhole"); | |
} | |
} else { | |
exec("/sbin/service greyhole condrestart"); | |
} | |
// For Amahi users | |
if (file_exists('/usr/bin/hdactl')) { | |
echo "You should de-select this partition in your Amahi dashboard (http://hda), in the Shares > Storage Pool page.\n"; | |
} | |
mark_gone_ok($options['dir'], 'remove'); | |
mark_gone_drive_fscked($options['dir'], 'remove'); | |
gh_log(INFO, "Storage pool drive " . $options['dir'] . " has been removed."); | |
echo "Storage pool drive " . $options['dir'] . " has been removed from your pool, which means the missing file copies that are in this drive will be re-created during the next fsck.\n"; | |
if ($action == 'going') { | |
// Schedule fsck for all shares to re-create missing copies on other shares | |
db_query("INSERT INTO tasks (action, share, additional_info, complete) VALUES ('fsck', '', NULL, 'yes')") or gh_log(CRITICAL, "Can't insert fsck task: " . db_error()); | |
echo "All the files that were only on $going_dir have been copied somewhere else.\n"; | |
echo "A fsck of all shares has been scheduled, to recreate other file copies. It will start after all currently pending tasks have been completed.\n"; | |
} else { // $action == 'gone' | |
echo "Sadly, file copies that were only on this drive, if any, are now lost!\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'debug') { | |
if (!isset($options['debug_filename'])) { | |
print_usage(); | |
} | |
$filename = $options['debug_filename']; | |
if (mb_strpos($filename, '/') === FALSE) { | |
$filename = "/$filename"; | |
} | |
echo "Debugging file operations for file named \"$filename\"\n"; | |
echo "\nFrom DB\n=======\n"; | |
$debug_tasks = array(); | |
$query = sprintf("SELECT id, action, share, full_path, additional_info, event_date FROM tasks_completed WHERE full_path LIKE '%%%s%%' ORDER BY id ASC", | |
db_escape_string($filename) | |
); | |
$result = db_query($query) or die("Can't query tasks_completed with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
$debug_tasks[$row->id] = $row; | |
} | |
// Renames | |
$query = sprintf("SELECT id, action, share, full_path, additional_info, event_date FROM tasks_completed WHERE additional_info LIKE '%%%s%%' ORDER BY id ASC", | |
db_escape_string($filename) | |
); | |
while (TRUE) { | |
$result = db_query($query) or die("Can't query tasks_completed for renames with query: $query - Error: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
$debug_tasks[$row->id] = $row; | |
$query = sprintf("SELECT id, action, share, full_path, additional_info, event_date FROM tasks_completed WHERE additional_info = '%s' ORDER BY id ASC", | |
db_escape_string($row->full_path) | |
); | |
} | |
# Is there more? | |
$new_query = preg_replace('/SELECT .* FROM/i', 'SELECT COUNT(*) FROM', $query); | |
$result = db_query($new_query) or die("Can't query tasks_completed for COUNT of renames with query: $new_query - Error: " . db_error()); | |
if (db_fetch_object($result) !== FALSE) { | |
break; | |
} | |
} | |
ksort($debug_tasks); | |
$to_grep = array(); | |
foreach ($debug_tasks as $task) { | |
echo " [$task->event_date] Task ID $task->id: $task->action $task->share/$task->full_path" . ($task->action == 'rename' ? " -> $task->share/$task->additional_info" : '') . "\n"; | |
$to_grep["$task->share/$task->full_path"] = 1; | |
if ($task->action == 'rename') { | |
$to_grep["$task->share/$task->additional_info"] = 1; | |
} | |
} | |
if (empty($to_grep)) { | |
$to_grep[$filename] = 1; | |
if (mb_strpos($filename, '/') !== FALSE) { | |
$share = trim(mb_substr($filename, 0, mb_strpos(mb_substr($filename, 1), '/')+1), '/'); | |
$full_path = trim(mb_substr($filename, mb_strpos(mb_substr($filename, 1), '/')+1), '/'); | |
$debug_tasks[] = (object) array('share' => $share, 'full_path' => $full_path); | |
} | |
} | |
echo "\nFrom logs\n=========\n"; | |
$to_grep = array_keys($to_grep); | |
$to_grep = implode("|", $to_grep); | |
$commands = array(); | |
$commands[] = "zgrep -E -B 1 -A 1 -h " . escapeshellarg($to_grep) . " $greyhole_log_file*.gz"; | |
$commands[] = "grep -E -B 1 -A 1 -h " . escapeshellarg($to_grep) . " " . escapeshellarg($greyhole_log_file); | |
foreach ($commands as $command) { | |
exec($command, $result); | |
} | |
foreach ($result as $rline) { | |
echo preg_replace('@^' . $greyhole_log_file . '[0-9\.gz-]*:@', '', $rline) . "\n"; | |
} | |
echo "\nFrom filesystem\n===============\n"; | |
$last_task = array_pop($debug_tasks); | |
$share = $last_task->share; | |
if ($last_task->action == 'rename') { | |
$full_path = $last_task->additional_info; | |
} else { | |
$full_path = $last_task->full_path; | |
} | |
list($path, $filename) = explode_full_path($full_path); | |
echo "Landing Zone:\n"; | |
echo " "; passthru("ls -l " . escapeshellarg(get_share_landing_zone($share) . "/" . $full_path)); | |
echo "\nMetadata Store:\n"; | |
foreach ($storage_pool_drives as $sp_drive) { | |
$metastore = clean_dir("$sp_drive/.gh_metastore"); | |
if (file_exists("$metastore/$share/$full_path")) { | |
echo " "; passthru("ls -l " . escapeshellarg("$metastore/$share/$full_path")); | |
$data = var_export(unserialize(file_get_contents("$metastore/$share/$full_path")), TRUE); | |
$data = str_replace("\n", "\n ", $data); | |
echo " $data\n"; | |
} | |
} | |
echo "\nFile copies:\n"; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (file_exists("$sp_drive/$share/$full_path")) { | |
echo " "; passthru("ls -l " . escapeshellarg("$sp_drive/$share/$full_path")); | |
} | |
} | |
exit(0); | |
} | |
if ($action == 'empty-trash') { | |
foreach ($storage_pool_drives as $sp_drive) { | |
$trash_path = clean_dir("$sp_drive/.gh_trash"); | |
if (!file_exists($trash_path)) { | |
echo "Trash in $sp_drive is empty. Nothing to do.\n"; | |
} else { | |
$trash_size = trim(exec("du -sk " . escapeshellarg($trash_path) . " | awk '{print $1}'")); | |
echo "Trash in $sp_drive is " . bytes_to_human($trash_size*1024, FALSE) . ". Emptying... "; | |
exec("rm -rf " . escapeshellarg($trash_path)); | |
echo "Done\n"; | |
} | |
} | |
if (isset($trash_share) && mb_strlen(escapeshellarg($trash_share['landing_zone'])) > 8) { | |
exec("rm -rf " . escapeshellarg($trash_share['landing_zone']) . '/*'); | |
} | |
exit(0); | |
} | |
if ($action == 'view-queue') { | |
$shares_names = array_keys($shares_options); | |
natcasesort($shares_names); | |
$max_share_strlen = 7; | |
foreach ($shares_names as $share_name) { | |
if (mb_strlen($share_name) > $max_share_strlen) { | |
$max_share_strlen = mb_strlen($share_name); | |
} | |
} | |
$queues = array(); | |
$total_num_writes_pending = $total_num_delete_pending = $total_num_rename_pending = $total_num_fsck_pending = 0; | |
foreach ($shares_names as $share_name) { | |
$result = db_query(sprintf("SELECT COUNT(*) AS num FROM tasks WHERE action = 'write' AND share = '%s' AND complete IN ('yes', 'thawed')", db_escape_string($share_name))) or die("Can't find # of writes in tasks table: " . db_error()); | |
$row = db_fetch_object($result); | |
$num_writes_pending = (int) $row->num; | |
$total_num_writes_pending += $num_writes_pending; | |
$result = db_query(sprintf("SELECT COUNT(*) AS num FROM tasks WHERE (action = 'unlink' OR action = 'rmdir') AND share = '%s' AND complete IN ('yes', 'thawed')", db_escape_string($share_name))) or die("Can't find # of deletes in tasks table: " . db_error()); | |
$row = db_fetch_object($result); | |
$num_delete_pending = (int) $row->num; | |
$total_num_delete_pending += $num_delete_pending; | |
$result = db_query(sprintf("SELECT COUNT(*) AS num FROM tasks WHERE action = 'rename' AND share = '%s' AND complete IN ('yes', 'thawed')", db_escape_string($share_name))) or die("Can't find # of renames in tasks table: " . db_error()); | |
$row = db_fetch_object($result); | |
$num_rename_pending = (int) $row->num; | |
$total_num_rename_pending += $num_rename_pending; | |
$result = db_query(sprintf("SELECT COUNT(*) AS num FROM tasks WHERE (action = 'fsck' OR action = 'fsck_file' OR action = 'md5') AND share = '%s'", db_escape_string($share_name))) or die("Can't find # of fsck in tasks table: " . db_error()); | |
$row = db_fetch_object($result); | |
$num_fsck_pending = (int) $row->num; | |
$total_num_fsck_pending += $num_fsck_pending; | |
$queues[$share_name] = (object) array( | |
'num_writes_pending' => $num_writes_pending, | |
'num_delete_pending' => $num_delete_pending, | |
'num_rename_pending' => $num_rename_pending, | |
'num_fsck_pending' => $num_fsck_pending, | |
); | |
} | |
$queues['Total'] = (object) array( | |
'num_writes_pending' => $total_num_writes_pending, | |
'num_delete_pending' => $total_num_delete_pending, | |
'num_rename_pending' => $total_num_rename_pending, | |
'num_fsck_pending' => $total_num_fsck_pending, | |
); | |
$queues['Spooled'] = (int) exec("ls -1 /var/spool/greyhole | wc -l"); | |
if (isset($options['json'])) { | |
echo json_encode($queues); | |
} else { | |
echo "\nGreyhole Work Queue Statistics\n==============================\n\n"; | |
echo "This table gives you the number of pending operations queued for the Greyhole daemon, per share.\n\n"; | |
$col_size = 7; | |
foreach ($queues['Total'] as $type => $num) { | |
$num = number_format($num, 0); | |
if (strlen($num) > $col_size) { | |
$col_size = strlen($num); | |
} | |
} | |
$col_format = '%' . $col_size . 's'; | |
printf("%$max_share_strlen"."s $col_format $col_format $col_format $col_format\n", '', 'Write', 'Delete', 'Rename', 'Check'); | |
foreach ($queues as $share_name => $queue) { | |
if ($share_name == 'Spooled') continue; | |
if ($share_name == 'Total') { | |
for ($i=0; $i<$max_share_strlen+2+(4*$col_size)+(3*2); $i++) { | |
echo "="; | |
} | |
echo "\n"; | |
} | |
echo sprintf("%-$max_share_strlen"."s", $share_name) . " "; | |
echo sprintf($col_format, number_format($queue->num_writes_pending, 0)) . " "; | |
echo sprintf($col_format, number_format($queue->num_delete_pending, 0)) . " "; | |
echo sprintf($col_format, number_format($queue->num_rename_pending, 0)) . " "; | |
echo sprintf($col_format, number_format($queue->num_fsck_pending, 0)) . "\n"; | |
} | |
printf("%$max_share_strlen"."s $col_format $col_format $col_format $col_format\n", '', 'Write', 'Delete', 'Rename', 'Check'); | |
echo "\nThe following is the number of pending operations that the Greyhole daemon still needs to parse.\n"; | |
echo "Until it does, the nature of those operations is unknown.\n"; | |
echo "Spooled operations that have been parsed will be listed above and disappear from the count below.\n"; | |
echo sprintf("\n%-$max_share_strlen"."s ", 'Spooled'); | |
echo $queues['Spooled'] . "\n"; | |
echo "\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'status') { | |
$num_dproc = (int) exec('ps ax | grep "^.*greyhole --daemon" | grep -v grep | wc -l'); | |
if ($num_dproc == 0) { | |
echo "\nGreyhole daemon is currently stopped.\n\n"; | |
exit(1); | |
} | |
$task = get_next_task($temp_rs, TRUE); | |
if ($task === FALSE) { | |
echo "\nCurrently idle.\n"; | |
} else { | |
echo "\nCurrently working on task ID $task->id: $task->action " . clean_dir("$task->share/$task->full_path") . ($task->action == 'rename' ? " -> " . clean_dir("$task->share/$task->additional_info") : '') . "\n"; | |
} | |
exec("tail -10 " . escapeshellarg($greyhole_log_file), $last_log_lines); | |
echo "\nRecent log entries:\n"; | |
echo " " . implode("\n ", $last_log_lines) . "\n"; | |
$last_log_line = $last_log_lines[count($last_log_lines)-1]; | |
$last_action_time = strtotime(mb_substr($last_log_line, 0, 15)); | |
$raw_last_log_line = mb_substr($last_log_line, 16); | |
$last_log_line = explode(' ', $raw_last_log_line); | |
$last_action = str_replace(':', '', $last_log_line[1]); | |
echo "\nLast logged action: $last_action\n"; | |
echo " on " . date('Y-m-d H:i:s', $last_action_time) . " (" . how_long_ago($last_action_time) . ")\n"; | |
echo "\n"; | |
exit(0); | |
} | |
if ($action == 'stats') { | |
$max_drive_strlen = 0; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (mb_strlen($sp_drive) > $max_drive_strlen) { | |
$max_drive_strlen = mb_strlen($sp_drive); | |
} | |
} | |
$max_drive_strlen++; | |
$totals = array( | |
'total_space' => 0, | |
'used_space' => 0, | |
'free_space' => 0, | |
'trash_size' => 0, | |
'potential_available_space' => 0 | |
); | |
$dfs = get_free_space_in_storage_pool_drives(); | |
$stats = array(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (!isset($dfs[$sp_drive])) { | |
$stats[$sp_drive] = (object) array(); | |
continue; | |
} | |
$df_command = "df -k " . escapeshellarg($sp_drive) . " | tail -1"; | |
unset($responses); | |
exec($df_command, $responses); | |
$total_space = 0; | |
$used_space = 0; | |
if (isset($responses[0])) { | |
if (preg_match("@\s+([0-9]+)\s+([0-9]+)\s+[0-9]+\s+[0-9]+%\s+.+$@", $responses[0], $regs)) { | |
$total_space = (float) $regs[1]; | |
$used_space = (float) $regs[2]; | |
} | |
} | |
$free_space = (float) $dfs[$sp_drive]; | |
$trash_path = clean_dir("$sp_drive/.gh_trash"); | |
if (!file_exists($trash_path)) { | |
$trash_size = (float) 0; | |
} else { | |
$trash_size = (float) trim(exec("du -sk " . escapeshellarg($trash_path) . " | awk '{print $1}'")); | |
} | |
$potential_available_space = (float) $free_space + $trash_size; | |
$stats[$sp_drive] = (object) array( | |
'total_space' => $total_space, | |
'used_space' => $used_space, | |
'free_space' => $free_space, | |
'trash_size' => $trash_size, | |
'potential_available_space' => $potential_available_space, | |
); | |
$totals['total_space'] += $total_space; | |
$totals['used_space'] += $used_space; | |
$totals['free_space'] += $free_space; | |
$totals['trash_size'] += $trash_size; | |
$totals['potential_available_space'] += $potential_available_space; | |
} | |
$stats['Total'] = (object) $totals; | |
if (isset($options['json'])) { | |
echo json_encode($stats); | |
} else { | |
echo "\nGreyhole Statistics\n===================\n\n"; | |
echo "Storage Pool\n"; | |
printf("%$max_drive_strlen"."s Total - Used = Free + Trash = Possible\n", ''); | |
foreach ($stats as $sp_drive => $stat) { | |
if ($sp_drive == 'Total') printf(" %-$max_drive_strlen"."s ==========================================\n", ""); | |
printf(" %-$max_drive_strlen"."s ", "$sp_drive:"); | |
if (empty($stat) || $stat->total_space === 0) { | |
echo " Offline \n"; | |
} else { | |
echo sprintf('%5.0f', $stat->total_space/1024/1024) . "G - " . sprintf('%5.0f', $stat->used_space/1024/1024) . "G = " . sprintf('%5.0f', $stat->free_space/1024/1024) . "G + " . sprintf('%5.0f', $stat->trash_size/1024/1024) . "G = " . sprintf('%5.0f', $stat->potential_available_space/1024/1024) . "G\n"; | |
} | |
} | |
echo "\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'wait-for') { | |
if (!mark_gone_ok($options['dir'])) { | |
if (!empty($options['dir'])) { | |
echo "Drive " . $options['dir'] . " is not one of your defined storage pool drive.\n"; | |
} | |
echo "Please use one of the following with the --wait-for option:\n "; | |
echo implode("\n ", $storage_pool_drives) . "\n"; | |
exit(1); | |
} | |
gh_log(INFO, "Storage pool drive " . $options['dir'] . " has been marked Temporarily-Gone"); | |
echo "Storage pool drive " . $options['dir'] . " has been marked Temporarily-Gone, which means the missing file copies that are in this drive will not be re-created until it reappears.\n"; | |
exit(0); | |
} | |
if ($action == 'balance') { | |
$query = "INSERT INTO tasks (action, share, complete) VALUES ('balance', '', 'yes')"; | |
db_query($query) or gh_log(CRITICAL, "Can't insert balance task: " . db_error()); | |
echo "A balance has been scheduled. It will start after all currently pending tasks have been completed.\n"; | |
echo "This operation will try to even the available space on all drives included in your storage pool.\n"; | |
exit(0); | |
} | |
if ($action == 'cancel-fsck') { | |
db_query("DELETE FROM tasks WHERE action = 'fsck'") or gh_log(CRITICAL, "Can't delete fsck tasks: " . db_error()); | |
db_query("DELETE FROM tasks WHERE action = 'md5'") or gh_log(CRITICAL, "Can't delete md5 tasks: " . db_error()); | |
echo "All scheduled fsck tasks have now been deleted.\n"; | |
echo "Specific files checks might have been queued for problematic files, and those (fsck_file) tasks will still be executed, once other tasks have been processed.\n"; | |
if (is_file('/etc/init.d/greyhole')) { | |
exec("/etc/init.d/greyhole condrestart"); | |
} else if (is_file('/etc/init/greyhole.conf')) { | |
exec("/sbin/restart greyhole"); | |
} else { | |
echo "You should now restart the Greyhole daemon to stop any ongoing fsck.\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'fsck') { | |
$pos = array_search('fsck', $argv); | |
$full_path = ''; | |
if (isset($options['dir'])) { | |
$full_path = $options['dir']; | |
if (!is_dir($full_path)) { | |
echo "$full_path is not a directory. Exiting.\n"; | |
exit(1); | |
} | |
} | |
$fsck_options = array(); | |
if (isset($options['email-report'])) { | |
$fsck_options[] = 'email'; | |
} | |
if (!isset($options['dont-walk-metadata-store'])) { | |
$fsck_options[] = 'metastore'; | |
} | |
if (isset($options['if-conf-changed'])) { | |
$fsck_options[] = 'if-conf-changed'; | |
} | |
if (isset($options['disk-usage-report'])) { | |
$fsck_options[] = 'du'; | |
} | |
if (isset($options['find-orphaned-files'])){ | |
$fsck_options[] = 'orphaned'; | |
} | |
if (isset($options['checksums'])){ | |
$fsck_options[] = 'checksums'; | |
} | |
if (isset($options['delete-orphaned-metadata'])){ | |
$fsck_options[] = 'del-orphaned-metadata'; | |
} | |
$query = sprintf("INSERT INTO tasks (action, share, additional_info, complete) VALUES ('fsck', '%s', %s, 'yes')", | |
db_escape_string($full_path), | |
(!empty($fsck_options) ? "'" . implode('|', $fsck_options) . "'" : "NULL") | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't insert fsck task: " . db_error()); | |
if ($full_path == '') { | |
$full_path = 'all shares'; | |
} | |
echo "fsck of $full_path has been scheduled. It will start after all currently pending tasks have been completed.\n"; | |
if (isset($options['checksums'])){ | |
echo "Any mismatch in checksums will be logged in both " . $greyhole_log_file . " and " . FSCKLogFile::PATH . "/fsck_checksums.log\n"; | |
} | |
exit(0); | |
} | |
if ($action == 'md5-worker') { | |
if (is_array($options['dir'])) { | |
$dirs = $options['dir']; | |
} else { | |
$dirs = array($options['dir']); | |
} | |
md5_worker_thread($dirs); | |
exit(0); | |
} | |
$num_daemon_processes = exec('ps ax | grep ".*/php .*/greyhole --daemon" | grep -v grep | wc -l'); | |
if ($num_daemon_processes > 1) { | |
die("Found an already running Greyhole daemon with PID " . trim(file_get_contents('/var/run/greyhole.pid')) . ".\nCan't start multiple Greyhole daemons.\nQuitting.\n"); | |
} | |
gh_log(INFO, "Greyhole (version 0.9.16) daemon started."); | |
repair_tables(); | |
terminology_conversion(); | |
set_metastore_backup(); | |
set_uniq_id(); | |
Settings::backup(); | |
parse_samba_spool(); | |
simplify_tasks(); | |
while (TRUE) { | |
parse_samba_spool(); | |
$action = 'check_pool'; | |
check_storage_pool_drives(); | |
execute_next_task(); | |
} | |
function terminology_conversion() { | |
convert_folders('.gh_graveyard','.gh_metastore'); | |
convert_folders('.gh_graveyard_backup','.gh_metastore_backup'); | |
convert_folders('.gh_attic','.gh_trash'); | |
convert_database(); | |
} | |
function convert_database() { | |
Settings::rename('graveyard_backup_directory', 'metastore_backup_directory'); | |
$setting = Settings::get('metastore_backup_directory', '%graveyard%'); | |
if ($setting) { | |
$new_value = str_replace('/.gh_graveyard_backup', '/.gh_metastore_backup', $setting->value); | |
Settings::set('metastore_backup_directory', $new_value); | |
} | |
} | |
function convert_folders($old,$new) { | |
global $storage_pool_drives; | |
foreach ($storage_pool_drives as $sp_drive) { | |
$old_term = clean_dir("$sp_drive/".$old); | |
$new_term = clean_dir("$sp_drive/".$new); | |
if (file_exists($old_term)) { | |
gh_log(INFO, "Moving $old_term to $new_term..."); | |
gh_rename($old_term,$new_term); | |
} | |
} | |
} | |
function get_next_task(&$result_new_tasks, $incl_md5=FALSE, $update_idle=TRUE) { | |
$query = "SELECT id, action, share, full_path, additional_info, complete FROM tasks WHERE complete IN ('yes', 'thawed')" . (!$incl_md5 ? " AND action != 'md5'" : "") . " ORDER BY id ASC LIMIT 20"; | |
$result_new_tasks = db_query($query) or gh_log(CRITICAL, "Can't query tasks: " . db_error()); | |
$task = db_fetch_object($result_new_tasks); | |
if ($task === FALSE && $update_idle) { | |
// No more complete = yes|thawed; let's look for complete = 'idle' tasks. | |
$query = "UPDATE tasks SET complete = 'yes' WHERE complete = 'idle'"; | |
db_query($query) or gh_log(CRITICAL, "Can't update idle tasks to complete tasks: " . db_error()); | |
$task = get_next_task($result_new_tasks, $incl_md5, FALSE); | |
} | |
return $task; | |
} | |
function execute_next_task() { | |
global $log_level, $fsck_report, $storage_pool_drives, $shares_options, $email_to, $sleep_before_task, $action, $frozen_directories, $next_task, $current_task_id, $locked_files, $result_new_tasks, $fix_symlinks_scanned_dirs; | |
if (isset($next_task)) { | |
$task = $next_task; | |
unset($GLOBALS['next_task']); | |
} else { | |
$task = FALSE; | |
if (!empty($result_new_tasks)) { | |
$task = db_fetch_object($result_new_tasks); | |
if ($task === FALSE) { | |
db_free_result($result_new_tasks); | |
$result_new_tasks = null; | |
} | |
} | |
if ($task === FALSE) { | |
$task = get_next_task($result_new_tasks, TRUE); | |
if ($task === FALSE) { | |
$action = 'sleep'; | |
gh_log(DEBUG, "Nothing to do... Sleeping."); | |
repair_tables(); | |
$query = "SELECT * from tasks WHERE action = 'md5' AND complete = 'no' LIMIT 1"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't query tasks for incomplete md5: " . db_error()); | |
if ($row = db_fetch_object($result)) { | |
$num_worker_threads = (int) trim(exec("ps x | grep '/usr/bin/greyhole --md5-worker' | grep -v grep | wc -l")); | |
if ($num_worker_threads == 0) { | |
gh_log(DEBUG, "Will spawn new worker threads to work on incomplete checksums calculations."); | |
foreach ($storage_pool_drives as $sp_drive) { | |
spawn_thread('md5-worker', array($sp_drive)); | |
} | |
} | |
} | |
db_free_result($result); | |
// Email any unsent fsck reports found in /usr/share/greyhole/ | |
foreach (array('fsck_checksums.log', 'fsck_files.log') as $log_file) { | |
$log = new FSCKLogFile($log_file); | |
$log->emailAsRequired(); | |
} | |
sleep($log_level == DEBUG ? 10 : ($log_level == TEST || PERF ? 1 : 600)); | |
db_free_result($result_new_tasks); | |
$result_new_tasks = null; | |
$locked_files = array(); | |
return; | |
} | |
} | |
} | |
$current_task_id = $task->id; | |
# Postpone tasks in frozen directories until a --thaw command is received | |
if ($task->complete != 'thawed') { | |
foreach ($frozen_directories as $frozen_directory) { | |
if (mb_strpos("$task->share/$task->full_path", $frozen_directory) === 0) { | |
$action = $task->action; | |
gh_log(DEBUG, "Now working on task ID $task->id: $task->action " . clean_dir("$task->share/$task->full_path") . ($task->action == 'rename' ? " -> $task->share/$task->additional_info" : '')); | |
gh_log(DEBUG, " This directory is frozen. Will postpone this task until it is thawed."); | |
postpone_task($task->id, 'frozen'); | |
archive_task($task->id); | |
return; | |
} | |
} | |
} | |
if (($key = array_search($task->id, $sleep_before_task)) !== FALSE) { | |
$action = 'sleep'; | |
gh_log(DEBUG, "Only locked files operations pending... Sleeping."); | |
sleep($log_level == DEBUG ? 10 : ($log_level == TEST ? 1 : 600)); | |
$sleep_before_task = array(); | |
$locked_files = array(); | |
} | |
$action = $task->action; | |
gh_log(INFO, "Now working on task ID $task->id: $task->action " . clean_dir("$task->share/$task->full_path") . ($task->action == 'rename' ? " -> $task->share/$task->additional_info" : '')); | |
switch ($task->action) { | |
case 'balance': | |
gh_log(INFO, "Starting available space balancing"); | |
gh_balance(); | |
gh_log(INFO, "Available space balancing completed."); | |
break; | |
case 'fsck_file': | |
set_fsck_options($task); | |
$task->full_path = get_share_landing_zone($task->share) . '/' . $task->full_path; | |
$file_type = @filetype($task->full_path); | |
list($path, $filename) = explode_full_path($task->full_path); | |
FSCKLogFile::loadFSCKReport('Missing files'); // Create or load the fsck_report from disk | |
gh_fsck_file($path, $filename, $file_type, 'metastore', $task->share); | |
if (task_has_option($task, 'email')) { | |
// Save the report to disk to be able to email it when we're done with all fsck_file tasks | |
FSCKLogFile::saveFSCKReport(); | |
} | |
break; | |
case 'md5': | |
gh_check_md5($task); | |
break; | |
case 'fsck': | |
$new_conf_md5 = get_conf_md5(); | |
if (task_has_option($task, 'if-conf-changed')) { | |
// Let's check if the conf file changed since the last fsck | |
// Last value | |
$last_md5 = ''; | |
$setting = Settings::get('last_fsck_conf_md5'); | |
if ($setting) { | |
$last_md5 = $setting->value; | |
} | |
// New value | |
if ($new_conf_md5 == $last_md5) { | |
gh_log(INFO, "Skipping fsck; --if-conf-changed was specified, and the configuration file didn't change since the last fsck."); | |
break; | |
} | |
} | |
$where_clause = ""; | |
if ($task->share == '') { | |
$fsck_what_dir = 'All shares'; | |
} else { | |
$fsck_what_dir = $task->share; | |
$max_lz_length = 0; | |
foreach ($shares_options as $share_name => $share_options) { | |
if (strpos($fsck_what_dir, $share_options['landing_zone']) === 0 && strlen($share_options['landing_zone']) > $max_lz_length) { | |
$max_lz_length = strlen($share_options['landing_zone']); | |
$where_clause = sprintf("AND share = '%s'", $share_name); | |
} | |
} | |
} | |
// First, let's remove all md5 tasks that would be duplicates of the ones we'll create during this fsck | |
db_query("DELETE FROM tasks WHERE action = 'md5' $where_clause") or gh_log(CRITICAL, "Can't delete deprecated md5 tasks: " . db_error()); | |
// Second, let's make sure all fsck_file tasks marked idle get executed. | |
$query = "UPDATE tasks SET complete = 'yes' WHERE action = 'fsck_file' AND complete = 'idle' $where_clause"; | |
db_query($query) or gh_log(CRITICAL, "Can't update fsck_file/idle tasks to fsck_file/complete: " . db_error()); | |
$result = db_query("SELECT COUNT(*) AS num_updated_rows FROM tasks WHERE action = 'fsck_file' AND complete = 'yes' $where_clause") or gh_log(CRITICAL, "Can't find number of updated tasks for fsck_file/complete tasks: " . db_error()); | |
$row = db_fetch_object($result); | |
if ($row->num_updated_rows > 0) { | |
// Updated some fsck_file to complete; let's just return here, to allow them to be executed first. | |
gh_log(INFO, "Will execute all ($row->num_updated_rows) pending fsck_file operations for $fsck_what_dir before running this fsck (task ID $task->id)."); | |
return; | |
} | |
gh_log(INFO, "Starting fsck for $fsck_what_dir"); | |
initialize_fsck_report($fsck_what_dir); | |
clearstatcache(); | |
if (task_has_option($task, 'checksums')) { | |
// Spawn md5 worker threads; those will calculate files MD5, and save the result in the DB. | |
// The Greyhole daemon will then read those, and check them against each other to make sure all is fine. | |
$checksums_thread_ids = array(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
$checksums_thread_ids[] = spawn_thread('md5-worker', array($sp_drive)); | |
} | |
gh_log(DEBUG, "Spawned " . count($checksums_thread_ids) . " worker threads to calculate MD5 checksums. Will now wait for results, and check them as they come in."); | |
} | |
set_fsck_options($task); | |
if ($task->share == '') { | |
foreach ($shares_options as $share_name => $share_options) { | |
gh_fsck($share_options['landing_zone'], $share_name); | |
} | |
if (task_has_option($task, 'metastore')) { | |
foreach (get_metastores() as $metastore) { | |
foreach ($shares_options as $share_name => $share_options) { | |
gh_fsck_metastore($metastore, "/$share_name", $share_name); | |
} | |
} | |
} | |
if (task_has_option($task, 'orphaned')) { | |
foreach ($storage_pool_drives as $sp_drive) { | |
foreach ($shares_options as $share_name => $share_options) { | |
gh_fsck("$sp_drive/$share_name", $share_name, $sp_drive); | |
} | |
} | |
} | |
} else { | |
$share_options = get_share_options_from_full_path($task->share); | |
$storage_volume = FALSE; | |
if ($share_options === FALSE){ | |
//Since share_options is FALSE we didn't get a share path, maybe we got a storage volume path, let's check | |
$storage_volume = get_storage_volume_from_path($task->share); | |
$share_options = get_share_options_from_storage_volume($task->share,$storage_volume); | |
} | |
if ($share_options !== FALSE) { | |
$share = $share_options['name']; | |
$metastore = get_metastore_from_path($task->share); | |
if($metastore === FALSE){ | |
//Only kick off an fsck on the passed dir if it's not a metastore | |
gh_fsck($task->share, $share, $storage_volume); | |
} | |
if (task_has_option($task, 'metastore') !== FALSE) { | |
if($metastore === FALSE){ | |
//This isn't a metastore dir so we'll check the metastore of this path on all volumes | |
if($storage_volume !== FALSE){ | |
$subdir = str_replace($storage_volume, '', $task->share); | |
}else{ | |
$subdir = "/$share" . str_replace($share_options['landing_zone'], '', $task->share); | |
} | |
gh_log(DEBUG, "Starting metastores fsck for $subdir"); | |
foreach (get_metastores() as $metastore) { | |
gh_fsck_metastore($metastore, $subdir, $share); | |
} | |
}else{ | |
//This is a metastore directory, so only kick off a metastore fsck for the indicated directory (this will not fsck the corresponding metastore path on other volumes) | |
$subdir = str_replace("$metastore", '', $task->share); | |
gh_log(DEBUG, "Starting metastore fsck for $metastore/$subdir"); | |
gh_fsck_metastore($metastore, $subdir, $share); | |
} | |
} | |
} | |
} | |
gh_log(INFO, "fsck for " . ($task->share == '' ? 'All shares' : $task->share) . " completed."); | |
Settings::set('last_fsck_conf_md5', $new_conf_md5); | |
if (task_has_option($task, 'email')) { | |
// Email report for fsck | |
$fsck_report_mail = get_fsck_report(); | |
gh_log(DEBUG, "Sending fsck report to $email_to"); | |
mail($email_to, 'fsck of Greyhole shares on ' . exec('hostname'), $fsck_report_mail); | |
} | |
if (task_has_option($task, 'du')) { | |
// Save disk-usage report to disk | |
$fp = fopen('/usr/share/greyhole/gh-disk-usage.log', 'w'); | |
if ($fp) { | |
global $du; | |
foreach ($du as $path => $size) { | |
$chars_count = count_chars($path, 1); | |
fwrite($fp, $chars_count[ord('/')] . " $path $size\n"); | |
} | |
fwrite($fp, "# " . serialize($shares_options) . "\n"); | |
fclose($fp); | |
} | |
} | |
break; | |
case 'mkdir': | |
break; | |
case 'write': | |
gh_write($task->share, $task->full_path, $task->id); | |
break; | |
case 'rename': | |
$fix_symlinks_scanned_dirs = array(); | |
gh_mv($task->share, $task->full_path, $task->additional_info, $task->id); | |
break; | |
case 'unlink': | |
gh_unlink($task->share, $task->full_path, $task->id); | |
break; | |
case 'rmdir': | |
gh_rmdir($task->share, $task->full_path); | |
break; | |
} | |
if ($task->action != 'write' && $task->action != 'rename') { | |
$sleep_before_task = array(); | |
} | |
archive_task($task->id); | |
} | |
function archive_task($task_id) { | |
$query = sprintf("INSERT INTO tasks_completed SELECT * FROM tasks WHERE id = %d", $task_id); | |
$worked = db_query($query); | |
if (!$worked) { | |
// Let's try a second time... This is kinda important! | |
global $db_options; | |
db_connect() or gh_log(CRITICAL, "Can't connect to $db_options->engine database."); | |
db_query($query) or gh_log(CRITICAL, "Can't insert in tasks_completed: " . db_error()); | |
} | |
$query = sprintf("DELETE FROM tasks WHERE id = %d", $task_id); | |
db_query($query) or gh_log(CRITICAL, "Can't delete from tasks: " . db_error()); | |
} | |
function gh_rmdir($share, $full_path) { | |
global $storage_pool_drives, $trash_share_names; | |
$landing_zone = get_share_landing_zone($share); | |
if (!$landing_zone) { | |
return; | |
} | |
gh_log(INFO, "Directory deleted: $landing_zone/$full_path"); | |
if (array_search($share, $trash_share_names) !== FALSE) { | |
// Remove that directory from all trashs | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (@rmdir("$sp_drive/.gh_trash/$full_path")) { | |
gh_log(DEBUG, " Removed copy from trash at $sp_drive/.gh_trash/$full_path"); | |
} | |
} | |
return; | |
} | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (@rmdir("$sp_drive/$share/$full_path/")) { | |
gh_log(DEBUG, " Removed copy at $sp_drive/$share/$full_path"); | |
} | |
$metastore = "$sp_drive/.gh_metastore"; | |
if (@rmdir("$metastore/$share/$full_path/")) { | |
gh_log(DEBUG, " Removed metadata files directory $metastore/$share/$full_path"); | |
} | |
} | |
} | |
function gh_unlink($share, $full_path, $task_id) { | |
global $trash_share_names, $trash_share, $storage_pool_drives; | |
$landing_zone = get_share_landing_zone($share); | |
if (!$landing_zone) { | |
return; | |
} | |
gh_log(INFO, "File deleted: $landing_zone/$full_path"); | |
if (array_search($share, $trash_share_names) !== FALSE) { | |
// Will delete the file in the trash which has no corresponding symlink in the Greyhole Trash share. | |
// That symlink is what was deleted from that share to create the task we're currently working on. | |
$full_path = preg_replace('/ copy [0-9]+$/', '', $full_path); | |
gh_log(DEBUG, " Looking for corresponding file in trash to delete..."); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (file_exists("$sp_drive/.gh_trash/$full_path")) { | |
$delete = TRUE; | |
list($path, $filename) = explode_full_path("{$trash_share['landing_zone']}/$full_path"); | |
if ($dh = opendir($path)) { | |
while (($file = readdir($dh)) !== FALSE) { | |
if ($file == '.' || $file == '..') { continue; } | |
if (is_link("$path/$file") && readlink("$path/$file") == "$sp_drive/.gh_trash/$full_path") { | |
$delete = FALSE; | |
continue; | |
} | |
} | |
} | |
if ($delete) { | |
gh_log(DEBUG, " Deleting corresponding copy $sp_drive/.gh_trash/$full_path"); | |
unlink("$sp_drive/.gh_trash/$full_path"); | |
break; | |
} | |
} | |
} | |
return; | |
} | |
if (gh_file_exists("$landing_zone/$full_path") && !is_dir("$landing_zone/$full_path")) { | |
gh_log(DEBUG, " File still exists in landing zone; a new file replaced the one deleted here. Skipping."); | |
return; | |
} | |
list($path, $filename) = explode_full_path($full_path); | |
foreach (get_metafiles($share, $path, $filename, TRUE) as $existing_metafiles){ | |
foreach ($existing_metafiles as $metafile) { | |
gh_recycle($metafile->path); | |
} | |
} | |
remove_metafiles($share, $path, $filename); | |
} | |
function is_a_metastore_dir($share, $full_path) { | |
foreach (get_metastores() as $metastore) { | |
if (is_dir("$metastore/$share/$full_path")) { | |
return TRUE; | |
} | |
} | |
return FALSE; | |
} | |
function gh_mv($share, $full_path, $target_full_path, $task_id) { | |
global $storage_pool_drives, $log_level, $sleep_before_task; | |
$landing_zone = get_share_landing_zone($share); | |
if (!$landing_zone) { | |
return; | |
} | |
if (is_dir("$landing_zone/$target_full_path") || is_a_metastore_dir($share, $full_path)) { | |
gh_log(INFO, "Directory renamed: $landing_zone/$full_path -> $landing_zone/$target_full_path"); | |
foreach ($storage_pool_drives as $sp_drive) { | |
list($original_path, $dirname) = explode_full_path(get_share_landing_zone($share) . "/$target_full_path"); | |
if (is_dir("$sp_drive/$share/$full_path")) { | |
# Make sure the parent directory of target_full_path exists, before we try moving something there... | |
list($path, $dirname) = explode_full_path("$sp_drive/$share/$target_full_path"); | |
gh_mkdir($path, $original_path); | |
gh_rename("$sp_drive/$share/$full_path", "$sp_drive/$share/$target_full_path"); | |
gh_log(DEBUG, " Directory moved: $sp_drive/$share/$full_path -> $sp_drive/$share/$target_full_path"); | |
} | |
list($path, $dirname) = explode_full_path("$sp_drive/.gh_metastore/$share/$target_full_path"); | |
gh_mkdir($path, $original_path); | |
$result = @gh_rename("$sp_drive/.gh_metastore/$share/$full_path", "$sp_drive/.gh_metastore/$share/$target_full_path"); | |
if ($result) { | |
gh_log(DEBUG, " Metadata Store directory moved: $sp_drive/.gh_metastore/$share/$full_path -> $sp_drive/.gh_metastore/$share/$target_full_path"); | |
} | |
$result = @gh_rename("$sp_drive/.gh_metastore_backup/$share/$full_path", "$sp_drive/.gh_metastore_backup/$share/$target_full_path"); | |
if ($result) { | |
gh_log(DEBUG, " Backup Metadata Store directory moved: $sp_drive/.gh_metastore_backup/$share/$full_path -> $sp_drive/.gh_metastore_backup/$share/$target_full_path"); | |
} | |
} | |
foreach (get_metafiles($share, $target_full_path, null, FALSE, FALSE, FALSE) as $existing_metafiles){ | |
gh_log(DEBUG, "Existing metadata files: " . count($existing_metafiles)); | |
foreach ($existing_metafiles as $file_path => $file_metafiles) { | |
gh_log(DEBUG, " File metafiles: " . count($file_metafiles)); | |
$new_file_metafiles = array(); | |
$symlinked = FALSE; | |
foreach ($file_metafiles as $key => $metafile) { | |
$old_path = $metafile->path; | |
$metafile->path = str_replace("/$share/$full_path/$file_path", "/$share/$target_full_path/$file_path", $metafile->path); | |
gh_log(DEBUG, " Changing metadata file: $old_path -> $metafile->path"); | |
$new_file_metafiles[$metafile->path] = $metafile; | |
// is_linked = is the target of the existing symlink | |
if ($metafile->is_linked) { | |
$symlinked = TRUE; | |
$symlink_target = $metafile->path; | |
} | |
} | |
if (!$symlinked && count($file_metafiles) > 0) { | |
// None of the metafiles were is_linked; use the last one for the symlink. | |
$metafile->is_linked = TRUE; | |
$file_metafiles[$key] = $metafile; | |
$symlink_target = $metafile->path; | |
} | |
if (is_link("$landing_zone/$target_full_path/$file_path") && readlink("$landing_zone/$target_full_path/$file_path") != $symlink_target) { | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$target_full_path/$file_path to point to $symlink_target"); | |
unlink("$landing_zone/$target_full_path/$file_path"); | |
symlink($symlink_target, "$landing_zone/$target_full_path/$file_path"); | |
} else if (is_link("$landing_zone/$full_path/$file_path") && !file_exists(readlink("$landing_zone/$full_path/$file_path"))) { | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$full_path/$file_path to point to $symlink_target"); | |
unlink("$landing_zone/$full_path/$file_path"); | |
symlink($symlink_target, "$landing_zone/$full_path/$file_path"); | |
} else { | |
fix_symlinks($landing_zone, $share, "$full_path/$file_path", "$target_full_path/$file_path"); | |
} | |
list($path, $filename) = explode_full_path("$target_full_path/$file_path"); | |
save_metafiles($share, $path, $filename, $new_file_metafiles); | |
} | |
} | |
} else { | |
gh_log(INFO, "File renamed: $landing_zone/$full_path -> $landing_zone/$target_full_path"); | |
// Check if another process locked this file before we work on it. | |
global $locked_files; | |
if (isset($locked_files[clean_dir("$share/$target_full_path")]) || file_is_locked($share, $target_full_path) !== FALSE) { | |
gh_log(DEBUG, " File $landing_zone/$target_full_path is locked by another process. Will wait until it's unlocked to work on it."); | |
postpone_task($task_id); | |
$locked_files[clean_dir("$share/$target_full_path")] = TRUE; | |
return; | |
} | |
list($path, $filename) = explode_full_path($full_path); | |
list($target_path, $target_filename) = explode_full_path($target_full_path); | |
foreach (get_metafiles($share, $path, $filename, FALSE, FALSE, FALSE) as $existing_metafiles){ | |
// There might be old metafiles... for example, when a delete task was skipped. | |
// Let's remove the file copies if there are any leftovers; correct copies will be re-created below. | |
if (file_exists("$landing_zone/$target_full_path") && (count($existing_metafiles) > 0 || !is_link("$landing_zone/$target_full_path"))) { | |
foreach (get_metafiles($share, $target_path, $target_filename) as $existing_target_metafiles){ | |
if (count($existing_target_metafiles) > 0) { | |
foreach ($existing_target_metafiles as $metafile) { | |
gh_recycle($metafile->path); | |
} | |
remove_metafiles($share, $target_path, $target_filename); | |
} | |
} | |
} | |
if (count($existing_metafiles) == 0) { | |
// Any NOK metafiles that need to be removed? | |
foreach (get_metafiles($share, $path, $filename, TRUE, FALSE, FALSE) as $all_existing_metafiles){ | |
if (count($all_existing_metafiles) > 0) { | |
remove_metafiles($share, $path, $filename); | |
} | |
} | |
// New file | |
gh_write($share, $target_full_path, $task_id); | |
} else { | |
$symlinked = FALSE; | |
foreach ($existing_metafiles as $key => $metafile) { | |
$old_path = $metafile->path; | |
$metafile->path = str_replace("/$share/$full_path", "/$share/$target_full_path", $old_path); | |
gh_log(DEBUG, " Renaming copy at $old_path to $metafile->path"); | |
// Make sure the target directory exists | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
list($original_path, $dirname) = explode_full_path(get_share_landing_zone($share) . "/$target_full_path"); | |
gh_mkdir($metafile_dir_path, $original_path); | |
$it_worked = gh_rename($old_path, $metafile->path); | |
if ($it_worked) { | |
// is_linked = is the target of the existing symlink | |
if ($metafile->is_linked) { | |
$symlinked = TRUE; | |
$symlink_target = $metafile->path; | |
} | |
} else { | |
gh_log(WARN, " Warning! An error occured while renaming file copy $old_path to $metafile->path."); | |
} | |
$existing_metafiles[$key] = $metafile; | |
} | |
if (!$symlinked && count($existing_metafiles) > 0) { | |
// None of the metafiles were is_linked; use the last one for the symlink. | |
$metafile->is_linked = TRUE; | |
$existing_metafiles[$key] = $metafile; | |
$symlink_target = $metafile->path; | |
} | |
remove_metafiles($share, $path, $filename); | |
save_metafiles($share, $target_path, $target_filename, $existing_metafiles); | |
if (is_link("$landing_zone/$target_full_path")) { | |
// New link exists... | |
if (readlink("$landing_zone/$target_full_path") != $symlink_target) { | |
// ...and needs to be updated. | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$target_full_path to point to $symlink_target"); | |
unlink("$landing_zone/$target_full_path"); | |
symlink($symlink_target, "$landing_zone/$target_full_path"); | |
} | |
} else if (is_link("$landing_zone/$full_path") && !file_exists(readlink("$landing_zone/$full_path"))) { | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$full_path to point to $symlink_target"); | |
unlink("$landing_zone/$full_path"); | |
symlink($symlink_target, "$landing_zone/$full_path"); | |
} else { | |
fix_symlinks($landing_zone, $share, $full_path, $target_full_path); | |
} | |
} | |
} | |
} | |
$sleep_before_task = array(); | |
} | |
function fix_symlinks($landing_zone, $share, $full_path, $target_full_path) { | |
global $storage_pool_drives, $fix_symlinks_scanned_dirs; | |
if (isset($fix_symlinks_scanned_dirs[$landing_zone])) { | |
return; | |
} | |
gh_log(INFO, " Scanning $landing_zone for broken links... This can take a while!"); | |
exec("find -L " . escapeshellarg($landing_zone) . " -type l", $broken_links); | |
gh_log(DEBUG, " Found " . count($broken_links) . " broken links."); | |
foreach ($broken_links as $broken_link) { | |
$fixed_link_target = readlink($broken_link); | |
foreach ($storage_pool_drives as $sp_drive) { | |
$fixed_link_target = str_replace(clean_dir("$sp_drive/$share/$full_path/"), clean_dir("$sp_drive/$share/$target_full_path/"), $fixed_link_target); | |
if ($fixed_link_target == "$sp_drive/$share/$full_path") { | |
$fixed_link_target = "$sp_drive/$share/$target_full_path"; | |
break; | |
} | |
} | |
if (gh_is_file($fixed_link_target)) { | |
gh_log(DEBUG, " Found a broken symlink to update: $broken_link. Old (broken) target: " . readlink($broken_link) . "; new (fixed) target: $fixed_link_target"); | |
unlink($broken_link); | |
symlink($fixed_link_target, $broken_link); | |
} | |
} | |
$fix_symlinks_scanned_dirs[$landing_zone] = TRUE; | |
} | |
function gh_file_exists($real_path, $log_message=null) { | |
clearstatcache(); | |
if (!file_exists($real_path)) { | |
if ($log_message != null) { | |
eval('$log_message = "' . str_replace('"', '\"', $log_message) . '";'); | |
gh_log(INFO, $log_message); | |
} | |
return FALSE; | |
} | |
return TRUE; | |
} | |
function get_num_copies($share) { | |
global $shares_options, $storage_pool_drives; | |
if (!isset($shares_options[$share]['num_copies'])) { | |
global $config_file; | |
gh_log(WARNING, "Found a task on a share ($share) that disappeared from $config_file. Skipping."); | |
return -1; | |
} | |
$num_copies = $shares_options[$share]['num_copies']; | |
if ($num_copies < 1) { | |
$num_copies = 1; | |
} | |
$max_copies = 0; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (is_greyhole_owned_dir($sp_drive)) { | |
$max_copies++; | |
} | |
} | |
if ($num_copies > $max_copies) { | |
$num_copies = $max_copies; | |
} | |
return $num_copies; | |
} | |
function file_is_locked($share, $full_path) { | |
global $check_for_open_files; | |
if (isset($check_for_open_files) && $check_for_open_files === FALSE) { | |
gh_log(DEBUG, " Skipping open file (lock) check."); | |
return FALSE; | |
} | |
$landing_zone = get_share_landing_zone($share); | |
if (!$landing_zone) { | |
return FALSE; | |
} | |
$real_fullpath = "$landing_zone/$full_path"; | |
if (is_link($real_fullpath)) { | |
$real_fullpath = readlink($real_fullpath); | |
} | |
$result = exec("lsof -M -n -P -l " . escapeshellarg($real_fullpath) . " 2> /dev/null"); | |
if (mb_strpos($result, $real_fullpath) !== FALSE) { | |
return $result; | |
} | |
$query = sprintf("SELECT * FROM tasks WHERE complete = 'no' AND action = 'write' AND share = '%s' AND full_path = '%s' LIMIT 1", | |
db_escape_string($share), | |
db_escape_string($full_path) | |
); | |
$result = db_query($query) or gh_log(CRITICAL, "Error querying tasks for locked files: " . db_error()); | |
return db_fetch_object($result) !== FALSE; | |
} | |
function real_file_is_locked($real_fullpath) { | |
if (is_link($real_fullpath)) { | |
$real_fullpath = readlink($real_fullpath); | |
} | |
$result = exec("lsof -M -n -P -l " . escapeshellarg($real_fullpath) . " 2> /dev/null"); | |
if (mb_strpos($result, $real_fullpath) !== FALSE) { | |
return $result; | |
} | |
return FALSE; | |
} | |
function find_future_full_path($share, $full_path, $task_id) { | |
$new_full_path = $full_path; | |
while ($next_task = find_next_rename_task($share, $new_full_path, $task_id)) { | |
if ($next_task->full_path == $full_path) { | |
// File was renamed | |
$new_full_path = $next_task->additional_info; | |
} else { | |
// A parent directory was renamed | |
$new_full_path = preg_replace("@^$next_task->full_path@", $next_task->additional_info, $new_full_path); | |
} | |
$task_id = $next_task->id; | |
} | |
return $new_full_path; | |
} | |
function find_next_rename_task($share, $full_path, $task_id) { | |
$full_paths = array(); | |
$full_paths[] = $full_path; | |
$parent_full_path = $full_path; | |
list($parent_full_path, $basename) = explode_full_path($parent_full_path); | |
while (strlen($parent_full_path) > 1) { | |
$full_paths[] = $parent_full_path; | |
list($parent_full_path, $basename) = explode_full_path($parent_full_path); | |
} | |
$query = sprintf("SELECT * FROM tasks WHERE complete = 'yes' AND share = '%s' AND action = 'rename' AND full_path IN ('%s') AND id > %s ORDER BY id LIMIT 1", | |
db_escape_string($share), | |
implode("','", array_map("db_escape_string", $full_paths)), | |
db_escape_string($task_id) | |
); | |
$result = db_query($query) or gh_log(CRITICAL, "Can't select to find next task: " . db_error()); | |
return db_fetch_object($result); | |
} | |
function gh_write($share, $full_path, $task_id) { | |
global $storage_pool_drives, $log_level, $balance_modified_files, $sleep_before_task; | |
$landing_zone = get_share_landing_zone($share); | |
if (!$landing_zone) { | |
return; | |
} | |
if (!gh_file_exists("$landing_zone/$full_path", '$real_path doesn\'t exist anymore.')) { | |
$new_full_path = find_future_full_path($share, $full_path, $task_id); | |
if ($new_full_path != $full_path && gh_is_file("$landing_zone/$new_full_path")) { | |
gh_log(DEBUG, " Found that $full_path has been renamed to $new_full_path. Will work using that instead."); | |
if (is_link("$landing_zone/$new_full_path")) { | |
$source_file = clean_dir(readlink("$landing_zone/$new_full_path")); | |
} else { | |
$source_file = clean_dir("$landing_zone/$new_full_path"); | |
} | |
} else { | |
gh_log(INFO, " Skipping."); | |
if (!gh_file_exists($landing_zone, ' Share "' . $share . '" landing zone "$real_path" doesn\'t exist anymore. Will not process this task until it re-appears...')) { | |
postpone_task($task_id); | |
} | |
return; | |
} | |
} | |
$num_copies_required = get_num_copies($share); | |
if ($num_copies_required === -1) { | |
return; | |
} | |
list($path, $filename) = explode_full_path($full_path); | |
if ((isset($new_full_path) && is_link("$landing_zone/$new_full_path")) || is_link("$landing_zone/$full_path")) { | |
if (!isset($source_file)) { | |
$source_file = clean_dir(readlink("$landing_zone/$full_path")); | |
} | |
clearstatcache(); | |
$filesize = gh_filesize($source_file); | |
if ($log_level >= DEBUG) { | |
gh_log(INFO, "File changed: $share/$full_path - " . bytes_to_human($filesize, FALSE)); | |
} else { | |
gh_log(INFO, "File changed: $share/$full_path"); | |
} | |
gh_log(DEBUG, " Will use source file: $source_file"); | |
foreach (get_metafiles($share, $path, $filename) as $existing_metafiles){ | |
gh_log(DEBUG, count($existing_metafiles) . " metadata files loaded."); | |
// Will keep existing copies at their location if balance_modified_files = no | |
// This will allow us to use rsync instead of blindly copying the complete files. | |
if ($balance_modified_files) { | |
// Remove old copies (but not the one that was updated!) | |
$keys_to_remove = array(); | |
$found_source_file = FALSE; | |
foreach ($existing_metafiles as $key => $metafile) { | |
$metafile->path = clean_dir($metafile->path); | |
if ($metafile->path == $source_file) { | |
$metafile->is_linked = TRUE; | |
$metafile->state = 'OK'; | |
$found_source_file = TRUE; | |
} else { | |
gh_log(DEBUG, " Will remove copy at $metafile->path"); | |
$keys_to_remove[] = $metafile->path; | |
} | |
// This will make sure the correct key is used in the $existing_metafiles array... | |
// This is important because that key is what is used to check if there's already a copy on that drive. | |
unset($existing_metafiles[$key]); | |
$existing_metafiles[$metafile->path] = $metafile; | |
} | |
if (!$found_source_file) { | |
// This shouldn't happen, but if we're about to remove all copies, let's make sure we keep at least one. | |
$key = array_shift($keys_to_remove); | |
$source_file = $existing_metafiles[$key]->path; | |
gh_log(DEBUG, " Change of mind... Will use source file: $source_file"); | |
} | |
foreach ($keys_to_remove as $key) { | |
if ($existing_metafiles[$key]->path != $source_file) { | |
gh_recycle($existing_metafiles[$key]->path); | |
} | |
unset($existing_metafiles[$key]); | |
} | |
} | |
gh_write_process_metafiles($num_copies_required,$existing_metafiles,$share,$full_path,$source_file,$filesize,$task_id); | |
} | |
} else { | |
if (!isset($source_file)) { | |
$source_file = clean_dir("$landing_zone/$full_path"); | |
} | |
clearstatcache(); | |
$filesize = gh_filesize($source_file); | |
if ($log_level >= DEBUG) { | |
gh_log(INFO, "File created: $share/$full_path - " . bytes_to_human($filesize, FALSE)); | |
} else { | |
gh_log(INFO, "File created: $share/$full_path"); | |
} | |
if (is_dir($source_file)) { | |
gh_log(INFO, "$share/$full_path is now a directory! Aborting."); | |
return; | |
} | |
// There might be old metafiles... for example, when a delete task was skipped. | |
// Let's remove the file copies if there are any leftovers; correct copies will be re-created in create_copies_from_metafiles() | |
foreach (get_metafiles($share, $path, $filename) as $existing_metafiles){ | |
gh_log(DEBUG, count($existing_metafiles) . " metafiles loaded."); | |
if (count($existing_metafiles) > 0) { | |
foreach ($existing_metafiles as $metafile) { | |
gh_recycle($metafile->path); | |
} | |
remove_metafiles($share, $path, $filename); | |
$existing_metafiles = array(); | |
// Maybe there's other file copies, that weren't metafiles, or were NOK metafiles! | |
global $storage_pool_drives; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (file_exists("$sp_drive/$share/$path/$filename")) { | |
gh_recycle("$sp_drive/$share/$path/$filename"); | |
} | |
} | |
} | |
gh_write_process_metafiles($num_copies_required,$existing_metafiles,$share,$full_path,$source_file,$filesize,$task_id); | |
} | |
} | |
} | |
function gh_write_process_metafiles($num_copies_required,$existing_metafiles,$share,$full_path,$source_file,$filesize,$task_id){ | |
$landing_zone = get_share_landing_zone($share); | |
list($path, $filename) = explode_full_path($full_path); | |
// Only need to check for locking if we have something to do! | |
if ($num_copies_required > 1 || count($existing_metafiles) == 0) { | |
// Check if another process locked this file before we work on it. | |
global $locked_files; | |
if (isset($locked_files[clean_dir("$share/$full_path")]) || ($locked_by = file_is_locked($share, $full_path)) !== FALSE) { | |
gh_log(DEBUG, " File $landing_zone/$full_path is locked by another process. Will wait until it's unlocked to work on it."); | |
postpone_task($task_id); | |
$locked_files[clean_dir("$share/$full_path")] = TRUE; | |
return; | |
} | |
$sleep_before_task = array(); | |
} | |
$metafiles = create_metafiles($share, $full_path, $num_copies_required, $filesize, $existing_metafiles); | |
if (count($metafiles) == 0) { | |
gh_log(WARN, " No metadata files could be created. Will wait until metadata files can be created to work on this file."); | |
postpone_task($task_id); | |
return; | |
} | |
if (!is_link("$landing_zone/$full_path")) { | |
// Use the 1st metafile for the symlink; it might be on a sticky drive. | |
$i = 0; | |
foreach ($metafiles as $metafile) { | |
$metafile->is_linked = ($i++ == 0); | |
} | |
} | |
save_metafiles($share, $path, $filename, $metafiles); | |
create_copies_from_metafiles($metafiles, $share, $full_path, $source_file); | |
} | |
function create_copies_from_metafiles($metafiles, $share, $full_path, $source_file, $missing_only=FALSE) { | |
$landing_zone = get_share_landing_zone($share); | |
list($path, $filename) = explode_full_path($full_path); | |
$source_file = clean_dir($source_file); | |
$link_next = FALSE; | |
$file_infos = gh_get_file_infos("$landing_zone/$full_path"); | |
foreach ($metafiles as $key => $metafile) { | |
if (!gh_file_exists("$landing_zone/$full_path", ' $real_path doesn\'t exist anymore. Aborting.')) { return; } | |
if ($metafile->path == $source_file && $metafile->state == 'OK' && gh_filesize($metafile->path) == gh_filesize($source_file)) { | |
gh_log(DEBUG, " File copy at $metafile->path is already up to date."); | |
continue; | |
} | |
if ($missing_only && gh_file_exists($metafile->path) && $metafile->state == 'OK' && gh_filesize($metafile->path) == gh_filesize($source_file)) { | |
gh_log(DEBUG, " File copy at $metafile->path is already up to date."); | |
continue; | |
} | |
if (is_link($source_file)) { | |
$source_size = gh_filesize(readlink($source_file)); | |
} else if (gh_is_file($source_file)) { | |
$source_size = gh_filesize($source_file); | |
} | |
if (isset($source_size)) { | |
gh_log(DEBUG, " Copying " . bytes_to_human($source_size, FALSE) . " file to $metafile->path"); | |
} else { | |
gh_log(DEBUG, " Copying file to $metafile->path"); | |
} | |
$root_path = str_replace(clean_dir("/$share/$full_path"), '', $metafile->path); | |
if (!is_greyhole_owned_dir($root_path)) { | |
gh_log(WARN, " Warning! It seems $root_path is missing it's \".greyhole_uses_this\" file. This either means this mount is currently unmounted, or you forgot to create this file."); | |
$metafile->state = 'Gone'; | |
$metafiles[$key] = $metafile; | |
continue; | |
} | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
list($original_path, $metafile_filename) = explode_full_path(get_share_landing_zone($share) . "/$full_path"); | |
if (!gh_mkdir($metafile_dir_path, $original_path)) { | |
$metafile->state = 'Gone'; | |
$metafiles[$key] = $metafile; | |
continue; | |
} | |
$temp_path = get_temp_filename($metafile->path); | |
$copied = FALSE; | |
$it_worked = FALSE; | |
$start_time = time(); | |
if (is_link($source_file)) { | |
exec(get_copy_cmd(readlink($source_file), $temp_path)); | |
$it_worked = file_exists($temp_path) && gh_filesize($temp_path) == $source_size; | |
} else if (gh_is_file($source_file)) { | |
$source_dev = gh_file_deviceid($source_file); | |
$target_dev = gh_file_deviceid($metafile_dir_path); | |
if ($source_dev === $target_dev && $source_dev !== FALSE) { | |
gh_log(DEBUG, " (using rename)"); | |
gh_rename($source_file, $temp_path); | |
$copied = FALSE; | |
} else { | |
exec(get_copy_cmd($source_file, $temp_path)); | |
$copied = TRUE; | |
} | |
$it_worked = file_exists($temp_path) && gh_filesize($temp_path) == $source_size; | |
} | |
if ($it_worked) { | |
if (time() - $start_time > 0) { | |
$speed = number_format($source_size/1024/1024 / (time() - $start_time), 1); | |
gh_log(DEBUG, " Copy created at $speed MBps."); | |
} | |
gh_rename($temp_path, $metafile->path); | |
gh_chperm($metafile->path, $file_infos); | |
} else { | |
gh_log(WARN, " Failed file copy. Will mark this metadata file 'Gone'."); | |
@unlink($temp_path); | |
if ($metafile->is_linked) { | |
$metafile->is_linked = FALSE; | |
$link_next = TRUE; | |
} | |
$metafile->state = 'Gone'; | |
gh_recycle($metafile->path); | |
$metafiles[$key] = $metafile; | |
save_metafiles($share, $path, $filename, array_values($metafiles)); | |
if (file_exists("$landing_zone/$full_path")) { | |
global $current_task_id; | |
if ($current_task_id === 0) { | |
gh_log(ERROR, " Failed file copy (cont). We already retried this task. Aborting."); | |
return; | |
} | |
gh_log(WARN, " Failed file copy (cont). Will try to re-process this write task, since the source file seems intact."); | |
// Queue a new write task, to replace the now gone copy. | |
global $next_task; | |
$next_task = (object) array( | |
'id' => 0, | |
'action' => 'write', | |
'share' => $share, | |
'full_path' => clean_dir($full_path), | |
'complete' => 'yes' | |
); | |
return; | |
} | |
continue; | |
} | |
if ($link_next && !$metafile->is_linked) { | |
$metafile->is_linked = TRUE; | |
$metafiles[$key] = $metafile; | |
} | |
$link_next = FALSE; | |
if ($metafile->is_linked) { | |
gh_log(DEBUG, " Creating symlink in share pointing to the above file copy."); | |
symlink($metafile->path, "$landing_zone/$path/.gh_$filename"); | |
if (!file_exists("$landing_zone/$full_path") || unlink("$landing_zone/$full_path")) { | |
gh_rename("$landing_zone/$path/.gh_$filename", "$landing_zone/$path/$filename"); | |
} else { | |
unlink("$landing_zone/$path/.gh_$filename"); | |
} | |
} | |
if (gh_file_exists($metafile->path, ' Copy at $real_path doesn\'t exist. Will not mark it OK!')) { | |
$metafile->state = 'OK'; | |
$metafiles[$key] = $metafile; | |
} | |
save_metafiles($share, $path, $filename, array_values($metafiles)); | |
} | |
} | |
function gh_chperm($real_file_path, $file_infos) { | |
chmod($real_file_path, $file_infos->fileperms); | |
chown($real_file_path, $file_infos->fileowner); | |
chgrp($real_file_path, $file_infos->filegroup); | |
} | |
function gh_mkdir($directory, $original_directory_or_dir_infos) { | |
if (!is_dir($directory)) { | |
if (is_string($original_directory_or_dir_infos)) { | |
$dir_infos = gh_get_file_infos($original_directory_or_dir_infos); | |
} else { | |
$dir_infos = $original_directory_or_dir_infos; | |
} | |
// Need to mkdir & chown/chgrp all dirs that don't exists, up to the full path ($directory) | |
$dir_parts = explode('/', $directory); | |
$i = 0; | |
$parent_directory = clean_dir('/' . $dir_parts[$i++]); | |
while (is_dir($parent_directory) && $i < count($dir_parts)) { | |
$parent_directory = clean_dir($parent_directory . '/' . $dir_parts[$i++]); | |
} | |
while ($i <= count($dir_parts)) { | |
if (!is_dir($parent_directory) && !@mkdir($parent_directory, $dir_infos->fileperms)) { | |
if (gh_is_file($parent_directory)) { | |
gh_rename($parent_directory, "$parent_directory (file copy)"); | |
} | |
if (!mkdir($parent_directory, $dir_infos->fileperms)) { | |
gh_log(WARN, " Failed to create directory $parent_directory"); | |
return FALSE; | |
} | |
} | |
if (!chown($parent_directory, $dir_infos->fileowner)) { | |
gh_log(WARN, " Failed to chown directory '$parent_directory'"); | |
} | |
if (!chgrp($parent_directory, $dir_infos->filegroup)) { | |
gh_log(WARN, " Failed to chgrp directory '$parent_directory'"); | |
} | |
if (!isset($dir_parts[$i])) { | |
break; | |
} | |
$parent_directory = clean_dir($parent_directory . '/' . $dir_parts[$i++]); | |
} | |
} | |
return TRUE; | |
} | |
function get_temp_filename($full_path) { | |
list($path, $filename) = explode_full_path($full_path); | |
return "$path/.$filename." . mb_substr(md5($filename), 0, 5); | |
} | |
function is_temp_file($full_path) { | |
list($path, $filename) = explode_full_path($full_path); | |
if (preg_match("/^\.(.+)\.([0-9a-f]{5})$/", $filename, $regs)) { | |
$md5_stem = mb_substr(md5($regs[1]), 0, 5); | |
return ($md5_stem == $regs[2]); | |
} | |
return FALSE; | |
} | |
function create_metafiles($share, $full_path, $num_copies_required, $filesize, $metafiles=array()) { | |
$found_link_metafile = FALSE; | |
list($path, $filename) = explode_full_path($full_path); | |
$num_ok = count($metafiles); | |
foreach ($metafiles as $key => $metafile) { | |
if (!file_exists($metafile->path)) { | |
// Re-use paths to old file copies that are now gone. | |
// This will allow us to use a new drive that has been installed where an old drive was previously. | |
$metafile->state = 'Pending'; | |
} | |
$root_path = str_replace(clean_dir("/$share/$full_path"), '', $metafile->path); | |
if (!is_greyhole_owned_dir($root_path)) { | |
$metafile->state = 'Gone'; | |
} | |
if ($metafile->state != 'OK' && $metafile->state != 'Pending') { | |
$num_ok--; | |
} | |
if ($key != $metafile->path) { | |
unset($metafiles[$key]); | |
$key = $metafile->path; | |
} | |
if ($metafile->is_linked) { | |
$found_link_metafile = TRUE; | |
} | |
$metafiles[$key] = $metafile; | |
} | |
// Select drives that have enough free space for this file | |
if ($num_ok < $num_copies_required) { | |
$local_target_drives = order_target_drives($filesize/1024, FALSE, $share, $path, ' '); | |
} | |
while ($num_ok < $num_copies_required && count($local_target_drives) > 0) { | |
$sp_drive = array_shift($local_target_drives); | |
$clean_target_full_path = clean_dir("$sp_drive/$share/$full_path"); | |
// Don't use drives that already have a copy | |
if (isset($metafiles[$clean_target_full_path])) { | |
continue; | |
} | |
foreach ($metafiles as $metafile) { | |
if ($clean_target_full_path == clean_dir($metafile->path)) { | |
continue; | |
} | |
} | |
// Prepend new target drives, to make sure sticky directories will be used first | |
$metafiles = array_reverse($metafiles); | |
$metafiles[$clean_target_full_path] = (object) array('path' => $clean_target_full_path, 'is_linked' => FALSE, 'state' => 'Pending'); | |
$metafiles = array_reverse($metafiles); | |
$num_ok++; | |
} | |
if (!$found_link_metafile) { | |
foreach ($metafiles as $metafile) { | |
$metafile->is_linked = TRUE; | |
break; | |
} | |
} | |
return $metafiles; | |
} | |
function get_metafile_data_filename($share, $path, $filename) { | |
$filenames = get_metafile_data_filenames($share, $path, $filename, 1); | |
if (count($filenames) > 0) { | |
return $filenames[0]; | |
} | |
return FALSE; | |
} | |
function get_metafile_data_filenames($share, $path, $filename, $how_many=9999) { | |
$filenames = array(); | |
foreach (get_metastores() as $metastore) { | |
$f = clean_dir("$metastore/$share/$path/$filename"); | |
if (is_file($f)) { | |
$filenames[] = $f; | |
if (count($filenames) == $how_many) { | |
return $filenames; | |
} | |
} | |
} | |
return $filenames; | |
} | |
function get_metafiles($share, $path, $filename=null, $load_nok_metafiles=FALSE, $quiet=FALSE, $check_symlink=TRUE){ | |
if($filename === null){ | |
return new metafile_iterator($share, $path, $load_nok_metafiles, $quiet, $check_symlink); | |
}else{ | |
return array(get_metafiles_for_file($share,$path,$filename,$load_nok_metafiles,$quiet,$check_symlink)); | |
} | |
} | |
function get_metafiles_for_file($share, $path, $filename=null, $load_nok_metafiles=FALSE, $quiet=FALSE, $check_symlink=TRUE) { | |
if (!$quiet) { | |
gh_log(DEBUG, "Loading metafiles for " . clean_dir($share . (!empty($path) ? "/$path" : "") . "/$filename") . ' ...'); | |
} | |
$metafiles_data_file = get_metafile_data_filename($share, $path, $filename); | |
clearstatcache(); | |
$metafiles = array(); | |
if (file_exists($metafiles_data_file)) { | |
$t = file_get_contents($metafiles_data_file); | |
$metafiles = unserialize($t); | |
} | |
if ($check_symlink) { | |
// Fix wrong 'is_linked' flags | |
$share_file = get_share_landing_zone($share) . "/$path/$filename"; | |
$share_file_link_to = FALSE; | |
if (is_link($share_file)) { | |
$share_file_link_to = readlink($share_file); | |
} | |
foreach ($metafiles as $key => $metafile) { | |
if ($metafile->state == 'OK' && $share_file_link_to !== FALSE) { | |
if ($metafile->is_linked && $metafile->path != $share_file_link_to) { | |
if (!$quiet) { | |
gh_log(DEBUG, ' Changing is_linked to FALSE for ' . $metafile->path); | |
} | |
$metafile->is_linked = FALSE; | |
$metafiles[$key] = $metafile; | |
save_metafiles($share, $path, $filename, array_values($metafiles)); | |
} else if (!$metafile->is_linked && $metafile->path == $share_file_link_to) { | |
if (!$quiet) { | |
gh_log(DEBUG, ' Changing is_linked to TRUE for ' . $metafile->path); | |
} | |
$metafile->is_linked = TRUE; | |
$metafiles[$key] = $metafile; | |
save_metafiles($share, $path, $filename, array_values($metafiles)); | |
} | |
} | |
} | |
} | |
$ok_metafiles = array(); | |
foreach ($metafiles as $key => $metafile) { | |
$valid_path = FALSE; | |
$dir = get_storage_volume_from_path($metafile->path); | |
if ($dir !== FALSE) { | |
$valid_path = TRUE; | |
} | |
if ($valid_path && ($load_nok_metafiles || $metafile->state == 'OK')) { | |
$ok_metafiles[$key] = $metafile; | |
} else { | |
if (!$valid_path) { | |
gh_log(WARN, "Found a metadata file pointing to a drive not defined in your storage pool: '$metafile->path'. Will mark it as Gone."); | |
$metafile->state = 'Gone'; | |
$metafiles[$key] = $metafile; | |
save_metafiles($share, $path, $filename, array_values($metafiles)); | |
} else { | |
#gh_log(DEBUG, "Found a metadata file, pointing to '$metafile->path', with state = '$metafile->state'. We just want 'OK' metadata files; will not use this metadata file."); | |
} | |
} | |
} | |
$metafiles = $ok_metafiles; | |
if (!$quiet) { | |
gh_log(DEBUG, " Got " . count($metafiles) . " metadata files."); | |
} | |
return $metafiles; | |
} | |
function remove_metafiles($share, $path, $filename) { | |
gh_log(DEBUG, " Removing metadata files for $share" . (!empty($path) ? "/$path" : "") . ($filename!== null ? "/$filename" : "")); | |
foreach (get_metafile_data_filenames($share, $path, $filename) as $f) { | |
@unlink($f); | |
gh_log(DEBUG, " Removed metadata file at $f"); | |
clearstatcache(); | |
} | |
} | |
function save_metafiles($share, $path, $filename, $metafiles) { | |
if (count($metafiles) == 0) { | |
remove_metafiles($share, $path, $filename); | |
return; | |
} | |
gh_log(DEBUG, " Saving " . count($metafiles) . " metadata files for " . clean_dir($share . (!empty($path) ? "/$path" : "") . ($filename!== null ? "/$filename" : ""))); | |
$paths_used = array(); | |
foreach (get_metastores() as $metastore) { | |
$sp_drive = str_replace('/.gh_metastore', '', $metastore); | |
$data_filepath = clean_dir("$metastore/$share/$path"); | |
$has_metafile = FALSE; | |
foreach ($metafiles as $metafile) { | |
if (get_storage_volume_from_path($metafile->path) == $sp_drive && is_greyhole_owned_dir($sp_drive)) { | |
gh_mkdir($data_filepath, get_share_landing_zone($share) . "/$path"); | |
gh_log(DEBUG, " Saving metadata in " . clean_dir("$data_filepath/$filename")); | |
if (is_dir("$data_filepath/$filename")) { | |
exec("rm -rf " . escapeshellarg("$data_filepath/$filename")); | |
} | |
file_put_contents("$data_filepath/$filename", serialize($metafiles)); | |
$has_metafile = TRUE; | |
$paths_used[] = $data_filepath; | |
break; | |
} | |
} | |
if (!$has_metafile && file_exists("$data_filepath/$filename")) { | |
unlink("$data_filepath/$filename"); | |
} | |
} | |
if (count($paths_used) == 1) { | |
// Also save a backup on another drive | |
global $metastore_backup_drives; | |
if (count($metastore_backup_drives) > 0) { | |
if (mb_strpos($paths_used[0], str_replace('.gh_metastore_backup', '.gh_metastore', $metastore_backup_drives[0])) === FALSE) { | |
$metastore_backup_drive = $metastore_backup_drives[0]; | |
} else { | |
$metastore_backup_drive = $metastore_backup_drives[1]; | |
} | |
$data_filepath = "$metastore_backup_drive/$share/$path"; | |
gh_log(DEBUG, " Saving backup metadata file in $data_filepath/$filename"); | |
gh_mkdir($data_filepath, get_share_landing_zone($share) . "/$path"); | |
file_put_contents("$data_filepath/$filename", serialize($metafiles)); | |
} | |
} | |
} | |
function simplify_tasks() { | |
global $action; | |
$action = 'simplify_tasks'; | |
gh_log(DEBUG, "Simplifying pending tasks."); | |
// Remove locked write tasks | |
$query = "SELECT share, full_path FROM tasks WHERE action = 'write' and complete = 'no'"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't select locked write tasks: " . db_error()); | |
while ($row = db_fetch_object($result)) { | |
$query = sprintf("DELETE FROM tasks WHERE action = 'write' and complete = 'yes' AND share = '%s' AND full_path = '%s'", | |
db_escape_string($row->share), | |
db_escape_string($row->full_path) | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't delete duplicate of locked write task: " . db_error()); | |
} | |
} | |
function parse_samba_log($simplify_after_parse=TRUE) { | |
global $samba_log_file, $action, $trash_share_names, $max_queued_tasks; | |
if (empty($samba_log_file)) { | |
return; | |
} | |
$old_action = $action; | |
$action = 'parse_logs'; | |
// If we have enough queued tasks (90% of $max_queued_tasks), let's not parse the log at this time, and get some work done. | |
// Once we fall below that, we'll queue up to at most $max_queued_tasks new tasks, then get back to work. | |
// This will effectively 'batch' large file operations to make sure the DB doesn't become a problem because of the number of rows, | |
// and this will allow the end-user to see real activity, other that new rows in greyhole.tasks... | |
$query = "SELECT COUNT(*) num_rows FROM tasks"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't get tasks count: " . db_error()); | |
$row = db_fetch_object($result); | |
db_free_result($result); | |
$num_rows = (int) $row->num_rows; | |
if ($num_rows >= ($max_queued_tasks * 0.9)) { | |
$action = $old_action; | |
gh_log(DEBUG, " More than " . ($max_queued_tasks * 0.9) . " tasks queued... Won't queue any more at this time."); | |
return; | |
} | |
$setting = Settings::get('last_read_log_smbd_line'); | |
if ($setting === FALSE) { | |
gh_log(WARN, "Received no rows when querying settings for 'last_read_log_smbd_line'; expected one."); | |
$setting = Settings::set('last_read_log_smbd_line', 0); | |
} | |
$f_seek_point = (int) $setting->value; | |
clearstatcache(); | |
$samba_log_size = gh_filesize($samba_log_file); | |
if ($f_seek_point > $samba_log_size) { | |
gh_log(DEBUG, "Log file size = $samba_log_size; forcing seek point to 0."); | |
$f_seek_point = 0; | |
} | |
$fp = fopen($samba_log_file, 'r') or gh_log(CRITICAL, "Can't open Samba log file '$samba_log_file' for reading."); | |
fseek($fp, $f_seek_point); | |
$new_tasks = 0; | |
$last_line = FALSE; | |
$act = FALSE; | |
while ($line = fgets($fp)) { | |
if ($last_line === FALSE) { | |
gh_log(DEBUG, "Parsing Samba logs..."); | |
} | |
if (($p = mb_strpos($line, ' smbd_greyhole:')) === FALSE) { | |
$last_line = 0; | |
} else { | |
$line = trim(mb_substr($line, $p+16)); | |
// Prevent insertion of unneeded duplicates | |
if ($line === $last_line) { | |
continue; | |
} | |
$line_ar = explode('*', $line); | |
if (count($line_ar) == 1) { | |
continue; | |
} | |
$last_line = $line; | |
// Close logs are only processed when no more duplicates are found, so we'll execute this now that a non-duplicate line was found. | |
if ($act === 'close') { | |
$query = sprintf("UPDATE tasks SET additional_info = NULL, complete = 'yes' WHERE complete = 'no' AND share = '%s' AND additional_info = '%s'", | |
db_escape_string($share), | |
$fd | |
); | |
db_query($query) or gh_log(CRITICAL, "Error updating tasks (1): " . db_error() . "; Query: $query"); | |
} | |
$line = $line_ar; | |
$act = array_shift($line); | |
$share = array_shift($line); | |
if ($act == 'mkdir') { | |
// Nothing to do with those | |
continue; | |
} | |
$result = array_pop($line); | |
if (mb_strpos($result, 'failed') === 0) { | |
gh_log(DEBUG, "Failed $act in $share/$line[0]. Skipping."); | |
continue; | |
} | |
unset($fullpath); | |
unset($fullpath_target); | |
unset($fd); | |
switch ($act) { | |
case 'open': | |
$fullpath = array_shift($line); | |
$fd = array_shift($line); | |
$act = 'write'; | |
break; | |
case 'rmdir': | |
case 'unlink': | |
$fullpath = array_shift($line); | |
break; | |
case 'rename': | |
$fullpath = array_shift($line); | |
$fullpath_target = array_shift($line); | |
break; | |
case 'close': | |
$fd = array_shift($line); | |
break; | |
default: | |
$act = FALSE; | |
} | |
if ($act === FALSE) { | |
continue; | |
} | |
// Close logs are only processed when no more duplicates are found, so we won't execute it just yet; we'll process it the next time we find a non-duplicate line. | |
if ($act != 'close') { | |
if (isset($fd) && $fd == -1) { | |
continue; | |
} | |
if ($act != 'unlink' && $act != 'rmdir' && array_search($share, $trash_share_names) !== FALSE) { continue; } | |
$new_tasks++; | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, additional_info, complete) VALUES ('%s', '%s', %s, %s, '%s')", | |
$act, | |
db_escape_string($share), | |
isset($fullpath) ? "'".db_escape_string(clean_dir($fullpath))."'" : 'NULL', | |
isset($fullpath_target) ? "'".db_escape_string(clean_dir($fullpath_target))."'" : (isset($fd) ? "'$fd'" : 'NULL'), | |
$act == 'write' ? 'no' : 'yes' | |
); | |
db_query($query) or gh_log(CRITICAL, "Error inserting task: " . db_error() . "; Query: $query"); | |
} | |
update_last_read_log_smbd_line($fp); | |
// If we have enough queued tasks ($max_queued_tasks), let's stop parsing the log, and get some work done. | |
if ($num_rows+$new_tasks >= $max_queued_tasks) { | |
gh_log(DEBUG, " We now have more than $max_queued_tasks tasks queued... Will stop parsing for now."); | |
break; | |
} | |
} | |
} | |
update_last_read_log_smbd_line($fp); | |
fclose($fp); | |
// Close logs are only processed when no more duplicates are found, so we'll execute this now that we're done parsing the current log. | |
if ($act == 'close') { | |
$query = sprintf("UPDATE tasks SET additional_info = NULL, complete = 'yes' WHERE complete = 'no' AND share = '%s' AND additional_info = '%s'", | |
db_escape_string($share), | |
$fd | |
); | |
db_query($query) or gh_log(CRITICAL, "Error updating tasks (2): " . db_error() . "; Query: $query"); | |
} | |
if ($last_line !== FALSE) { | |
gh_log(DEBUG, " Done parsing."); | |
} | |
if ($simplify_after_parse && $new_tasks > 0) { | |
$query = "SELECT COUNT(*) num_rows FROM tasks"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't get tasks count: " . db_error()); | |
$row = db_fetch_object($result); | |
db_free_result($result); | |
$num_rows = (int) $row->num_rows; | |
if ($num_rows < 1000 || $num_rows % 5 == 0) { // Runs 1/5 of the times when num_rows > 1000 | |
if ($num_rows < 5000 || $num_rows % 100 == 0) { // Runs 1/100 of the times when num_rows > 5000 | |
simplify_tasks(); | |
} | |
} | |
} | |
$action = $old_action; | |
} | |
function update_last_read_log_smbd_line($fp) { | |
$f_seek_point = ftell($fp); | |
Settings::set('last_read_log_smbd_line', $f_seek_point); | |
} | |
function parse_samba_spool($simplify_after_parse=TRUE) { | |
global $action, $trash_share_names, $max_queued_tasks, $db_use_mysql; | |
// Let's parse syslog still... just in case people are still using that. | |
parse_samba_log($simplify_after_parse); | |
$old_action = $action; | |
$action = 'read_smb_spool'; | |
// If we have enough queued tasks (90% of $max_queued_tasks), let's not parse the log at this time, and get some work done. | |
// Once we fall below that, we'll queue up to at most $max_queued_tasks new tasks, then get back to work. | |
// This will effectively 'batch' large file operations to make sure the DB doesn't become a problem because of the number of rows, | |
// and this will allow the end-user to see real activity, other that new rows in greyhole.tasks... | |
$query = "SELECT COUNT(*) num_rows FROM tasks"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't get tasks count: " . db_error()); | |
$row = db_fetch_object($result); | |
db_free_result($result); | |
$num_rows = (int) $row->num_rows; | |
if ($num_rows >= ($max_queued_tasks * 0.9)) { | |
$action = $old_action; | |
if (time() % 10 == 0) { | |
gh_log(DEBUG, " More than " . ($max_queued_tasks * 0.9) . " tasks queued... Won't queue any more at this time."); | |
} | |
return; | |
} | |
$new_tasks = 0; | |
$last_line = FALSE; | |
$act = FALSE; | |
while (TRUE) { | |
$files = array(); | |
$space_left_in_queue = $max_queued_tasks - $num_rows - $new_tasks; | |
exec('ls -1 /var/spool/greyhole | sort -n 2> /dev/null | head -' . $space_left_in_queue, $files); | |
if (count($files) == 0) { | |
break; | |
} | |
if ($last_line === FALSE) { | |
gh_log(DEBUG, "Processing Samba spool..."); | |
} | |
foreach ($files as $filename) { | |
@unlink($last_filename); | |
$filename = "/var/spool/greyhole/$filename"; | |
$last_filename = $filename; | |
$line = file_get_contents($filename); | |
// Prevent insertion of unneeded duplicates | |
if ($line === $last_line) { | |
continue; | |
} | |
$line_ar = explode("\n", $line); | |
$last_line = $line; | |
// Close logs are only processed when no more duplicates are found, so we'll execute this now that a non-duplicate line was found. | |
if ($act === 'close') { | |
$query = sprintf("UPDATE tasks SET additional_info = NULL, complete = 'yes' WHERE complete = 'no' AND share = '%s' AND additional_info = '%s'", | |
db_escape_string($share), | |
$fd | |
); | |
db_query($query) or gh_log(CRITICAL, "Error updating tasks (1): " . db_error() . "; Query: $query"); | |
} | |
$line = $line_ar; | |
$act = array_shift($line); | |
$share = array_shift($line); | |
if ($act == 'mkdir') { | |
// Nothing to do with those | |
continue; | |
} | |
$result = array_pop($line); | |
if (mb_strpos($result, 'failed') === 0) { | |
gh_log(DEBUG, "Failed $act in $share/$line[0]. Skipping."); | |
continue; | |
} | |
unset($fullpath); | |
unset($fullpath_target); | |
unset($fd); | |
switch ($act) { | |
case 'open': | |
$fullpath = array_shift($line); | |
$fd = array_shift($line); | |
$act = 'write'; | |
break; | |
case 'rmdir': | |
case 'unlink': | |
$fullpath = array_shift($line); | |
break; | |
case 'rename': | |
$fullpath = array_shift($line); | |
$fullpath_target = array_shift($line); | |
break; | |
case 'close': | |
$fd = array_shift($line); | |
break; | |
default: | |
$act = FALSE; | |
} | |
if ($act === FALSE) { | |
continue; | |
} | |
// Close logs are only processed when no more duplicates are found, so we won't execute it just yet; we'll process it the next time we find a non-duplicate line. | |
if ($act != 'close') { | |
if (isset($fd) && $fd == -1) { | |
continue; | |
} | |
if ($act != 'unlink' && $act != 'rmdir' && array_search($share, $trash_share_names) !== FALSE) { continue; } | |
$new_tasks++; | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, additional_info, complete) VALUES ('%s', '%s', %s, %s, '%s')", | |
$act, | |
db_escape_string($share), | |
isset($fullpath) ? "'".db_escape_string(clean_dir($fullpath))."'" : 'NULL', | |
isset($fullpath_target) ? "'".db_escape_string(clean_dir($fullpath_target))."'" : (isset($fd) ? "'$fd'" : 'NULL'), | |
$act == 'write' ? 'no' : 'yes' | |
); | |
db_query($query) or gh_log(CRITICAL, "Error inserting task: " . db_error() . "; Query: $query"); | |
} | |
// If we have enough queued tasks ($max_queued_tasks), let's stop parsing the log, and get some work done. | |
if ($num_rows+$new_tasks >= $max_queued_tasks) { | |
gh_log(DEBUG, " We now have more than $max_queued_tasks tasks queued... Will stop parsing for now."); | |
break; | |
} | |
} | |
@unlink($last_filename); | |
if ($num_rows+$new_tasks >= $max_queued_tasks) { | |
break; | |
} | |
} | |
// Close logs are only processed when no more duplicates are found, so we'll execute this now that we're done parsing the current log. | |
if ($act == 'close') { | |
$query = sprintf("UPDATE tasks SET additional_info = NULL, complete = 'yes' WHERE complete = 'no' AND share = '%s' AND additional_info = '%s'", | |
db_escape_string($share), | |
$fd | |
); | |
db_query($query) or gh_log(CRITICAL, "Error updating tasks (2): " . db_error() . "; Query: $query"); | |
} | |
if ($new_tasks > 0) { | |
gh_log(DEBUG, "Found $new_tasks new tasks in spool."); | |
if ($simplify_after_parse) { | |
$query = "SELECT COUNT(*) num_rows FROM tasks"; | |
$result = db_query($query) or gh_log(CRITICAL, "Can't get tasks count: " . db_error()); | |
$row = db_fetch_object($result); | |
db_free_result($result); | |
$num_rows = (int) $row->num_rows; | |
if ($num_rows < 1000 || $num_rows % 5 == 0) { // Runs 1/5 of the times when num_rows > 1000 | |
if ($num_rows < 5000 || $num_rows % 100 == 0) { // Runs 1/100 of the times when num_rows > 5000 | |
simplify_tasks(); | |
} | |
} | |
} | |
} | |
$action = $old_action; | |
} | |
function get_free_space_in_storage_pool_drives() { | |
global $storage_pool_drives, $df_command, $last_df_time, $last_dfs, $df_cache_time; | |
if ($last_df_time > time() - $df_cache_time) { | |
return $last_dfs; | |
} | |
$dfs = array(); | |
exec($df_command, $responses); | |
$responses_arr = array(); | |
foreach ($responses as $line) { | |
if (preg_match("@\s+[0-9]+\s+[0-9]+\s+([0-9]+)\s+[0-9]+%\s+(.+)$@", $line, $regs)) { | |
$responses_arr[] = "$regs[2] $regs[1]"; | |
} | |
} | |
$responses = $responses_arr; | |
foreach ($storage_pool_drives as $sp_drive) { | |
$target_dir = ''; | |
for ($i=0; $i<count($responses); $i++) { | |
$response = explode(' ', $responses[$i]); | |
$free_space = (float) array_pop($response); | |
$mount = implode(' ', $response); | |
if (mb_strpos($sp_drive, $mount) === 0 && mb_strlen($mount) > mb_strlen($target_dir)) { | |
$target_dir = $mount; | |
$target_freespace = $free_space; | |
} | |
} | |
$dfs[$sp_drive] = $target_freespace; | |
} | |
$last_df_time = time(); | |
$last_dfs = $dfs; | |
return $dfs; | |
} | |
function order_target_drives($filesize_kb, $include_full_drives, $share, $path, $log_prefix='') { | |
global $storage_pool_drives, $minimum_free_space_pool_drives, $last_OOS_notification, $sticky_files, $shares_options; | |
$sorted_target_drives = array(); | |
$last_resort_sorted_target_drives = array(); | |
$full_drives = array(); | |
$dfs = get_free_space_in_storage_pool_drives(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (!isset($dfs[$sp_drive])) { | |
if (!is_dir($sp_drive)) { | |
gh_log(ERROR, "The directory at $sp_drive doesn't exist. This partition will never be used!"); | |
if (file_exists('/usr/bin/hdactl')) { | |
gh_log(ERROR, "You should de-select, then re-select this partition in your Amahi dashboard (http://hda), in the Shares > Storage Pool page, to fix this problem."); | |
} else { | |
gh_log(ERROR, "See the INSTALL file for instructions on how to prepare partitions to include in your storage pool."); | |
} | |
} else { | |
gh_log(ERROR, "Can't find how much free space is left on $sp_drive. This partition will never be used!"); | |
gh_log(ERROR, "Please report this using the 'Bugs Tracker' link found on http://greyhole.net. You should include the following information in your ticket:"); | |
gh_log(ERROR, "===== Error report starts here ====="); | |
gh_log(ERROR, "Unknown free space for partition: $sp_drive"); | |
global $df_command; | |
gh_log(ERROR, "df_command: $df_command"); | |
unset($responses); | |
exec($df_command, $responses); | |
gh_log(ERROR, "Result of df_command: " . var_export($responses, TRUE)); | |
unset($responses); | |
exec('df -k 2>&1', $responses); | |
gh_log(ERROR, "Result of df -k: " . var_export($responses, TRUE)); | |
gh_log(ERROR, "===== Error report ends here ====="); | |
} | |
continue; | |
} | |
if (!is_greyhole_owned_dir($sp_drive)) { | |
continue; | |
} | |
$free_space = $dfs[$sp_drive]; | |
$minimum_free_space = (float) (isset($minimum_free_space_pool_drives[$sp_drive]) ? $minimum_free_space_pool_drives[$sp_drive]*1024*1024 : 0.0); | |
$available_space = (float) $free_space - $minimum_free_space; | |
if ($available_space <= $filesize_kb) { | |
if ($free_space > $filesize_kb) { | |
$last_resort_sorted_target_drives[$sp_drive] = $available_space; | |
} else { | |
$full_drives[$sp_drive] = $free_space; | |
} | |
continue; | |
} | |
$sorted_target_drives[$sp_drive] = $available_space; | |
} | |
foreach ($shares_options[$share]['dir_selection_algorithm'] as $ds) { | |
$ds->init($sorted_target_drives, $last_resort_sorted_target_drives); | |
} | |
$sorted_target_drives = array(); | |
$last_resort_sorted_target_drives = array(); | |
$got_all_drives = FALSE; | |
while (!$got_all_drives) { | |
$num_empty_ds = 0; | |
foreach ($shares_options[$share]['dir_selection_algorithm'] as $ds) { | |
list($drives, $drives_last_resort) = $ds->draft(); | |
foreach ($drives as $k => $v) { | |
$sorted_target_drives[$k] = $v; | |
} | |
foreach ($drives_last_resort as $k => $v) { | |
$last_resort_sorted_target_drives[$k] = $v; | |
} | |
if (count($drives) == 0 && count($drives_last_resort) == 0) { | |
$num_empty_ds++; | |
} | |
} | |
if ($num_empty_ds == count($shares_options[$share]['dir_selection_algorithm'])) { | |
// All DS are empty; exit. | |
$got_all_drives = TRUE; | |
break; | |
} | |
} | |
// Email notification when all dirs are over-capacity | |
if (count($sorted_target_drives) == 0) { | |
gh_log(WARN, " Warning! All storage pool drives are over-capacity!"); | |
if (!isset($last_OOS_notification)) { | |
$setting = Settings::get('last_OOS_notification'); | |
if ($setting === FALSE) { | |
gh_log(WARN, "Received no rows when querying settings for 'last_OOS_notification'; expected one."); | |
$setting = Settings::set('last_OOS_notification', 0); | |
} | |
$last_OOS_notification = $setting->value; | |
} | |
if ($last_OOS_notification < strtotime('-1 day')) { | |
global $email_to; | |
gh_log(INFO, " Sending email notification to $email_to"); | |
$hostname = exec('hostname'); | |
$body = "This is an automated email from Greyhole. | |
It appears all the defined storage pool drives are over-capacity. | |
You probably want to do something about this! | |
"; | |
foreach ($last_resort_sorted_target_drives as $sp_drive => $free_space) { | |
$minimum_free_space = (int) (isset($minimum_free_space_pool_drives[$sp_drive]) ? $minimum_free_space_pool_drives[$sp_drive] : 0); | |
$body .= "$sp_drive has " . number_format($free_space/1024/1024, 2) . " GB free; minimum specified in greyhole.conf: $minimum_free_space GB.\n"; | |
} | |
mail($email_to, "Greyhole is out of space on $hostname!", $body); | |
$last_OOS_notification = time(); | |
Settings::set('last_OOS_notification', $last_OOS_notification); | |
} | |
} | |
global $log_level; | |
if ($log_level >= DEBUG) { | |
if (count($sorted_target_drives) > 0) { | |
$log = $log_prefix ."Drives with available space: "; | |
foreach ($sorted_target_drives as $d => $s) { | |
$log .= "$d (" . bytes_to_human($s*1024, FALSE) . " avail) - "; | |
} | |
gh_log(DEBUG, mb_substr($log, 0, mb_strlen($log)-2)); | |
} | |
if (count($last_resort_sorted_target_drives) > 0) { | |
$log = $log_prefix ."Drives with enough free space, but no available space: "; | |
foreach ($last_resort_sorted_target_drives as $d => $s) { | |
$log .= "$d (" . bytes_to_human($s*1024, FALSE) . " avail) - "; | |
} | |
gh_log(DEBUG, mb_substr($log, 0, mb_strlen($log)-2)); | |
} | |
if (count($full_drives) > 0) { | |
$log = $log_prefix ."Drives full: "; | |
foreach ($full_drives as $d => $s) { | |
$log .= "$d (" . bytes_to_human($s*1024, FALSE) . " free) - "; | |
} | |
gh_log(DEBUG, mb_substr($log, 0, mb_strlen($log)-2)); | |
} | |
} | |
$drives = array_merge(array_keys($sorted_target_drives), array_keys($last_resort_sorted_target_drives)); | |
if ($include_full_drives) { | |
$drives = array_merge($drives, array_keys($full_drives)); | |
} | |
if (isset($sticky_files)) { | |
global $is_sticky; | |
$is_sticky = FALSE; | |
foreach ($sticky_files as $share_dir => $stick_into) { | |
if (gh_wild_mb_strpos("$share/$path", $share_dir) === 0) { | |
$is_sticky = TRUE; | |
$more_drives_needed = FALSE; | |
if (count($stick_into) > 0) { | |
// Stick files into specific drives: $stick_into | |
// Let's check if those drives are listed in the config file! | |
foreach ($stick_into as $key => $stick_into_dir) { | |
if (array_search($stick_into_dir, $storage_pool_drives) === FALSE) { | |
unset($stick_into[$key]); | |
$more_drives_needed = TRUE; | |
} | |
} | |
} | |
if (count($stick_into) == 0 || $more_drives_needed) { | |
if (mb_strpos($share_dir, '*') !== FALSE) { | |
// Contains a wildcard... In this case, we want each directory that match the wildcard to have it's own setting. Let's find this directory... | |
// For example, if $share_dir == 'Videos/Movies/*/*' and "$share/$path/" == "Videos/Movies/HD/La Vita e Bella/", we want to save a 'stick_into' setting for 'Videos/Movies/HD/La Vita e Bella/' | |
// Files in other subdirectories of Videos/Movies/HD/ could end up in other drives. | |
$needles = explode('*', $share_dir); | |
$sticky_dir = ''; | |
$wild_part = "$share/$path/"; | |
for ($i=0; $i<count($needles); $i++) { | |
$needle = $needles[$i]; | |
if ($i == 0) { | |
$sticky_dir = $needle; | |
$wild_part = @str_replace_first($needle, '', $wild_part); | |
} else { | |
if ($needle == '') { | |
$needle = '/'; | |
} | |
$small_wild_part = mb_substr($wild_part, 0, mb_strpos($wild_part, $needle)+mb_strlen($needle)); | |
$sticky_dir .= $small_wild_part; | |
$wild_part = str_replace_first($small_wild_part, '', $wild_part); | |
} | |
} | |
$sticky_dir = trim($sticky_dir, '/'); | |
} else { | |
$sticky_dir = $share_dir; | |
} | |
// Stick files into any drives | |
$setting_name = sprintf('stick_into-%s', $sticky_dir); | |
$setting = Settings::get($setting_name); | |
if ($setting !== FALSE) { | |
$stick_into = array_merge($stick_into, unserialize($setting->value)); | |
// Let's check if those drives are listed in the config file! | |
$update_needed = FALSE; | |
foreach ($stick_into as $key => $stick_into_dir) { | |
if (array_search($stick_into_dir, $storage_pool_drives) === FALSE) { | |
unset($stick_into[$key]); | |
$update_needed = TRUE; | |
} | |
} | |
if ($update_needed) { | |
$value = serialize($stick_into); | |
Settings::set($setting_name, $value); | |
} | |
} else { | |
$value = array_merge($stick_into, $drives); | |
Settings::set($setting_name, $value); | |
} | |
} | |
// Make sure the drives we want to use are not yet full | |
$priority_drives = array(); | |
foreach ($stick_into as $stick_into_dir) { | |
if (array_search($stick_into_dir, $full_drives) === FALSE && array_search($stick_into_dir, $storage_pool_drives) !== FALSE) { | |
unset($drives[array_search($stick_into_dir, $drives)]); | |
$priority_drives[] = $stick_into_dir; | |
} | |
} | |
$drives = array_merge($priority_drives, $drives); | |
gh_log(DEBUG, $log_prefix . "Reordered drives, per sticky_files config: " . implode(' - ', $drives)); | |
break; | |
} | |
} | |
} | |
return $drives; | |
} | |
function gh_fsck($path, $share, $storage_path = FALSE) { | |
global $storage_pool_drives, $fsck_report; | |
gh_log(DEBUG, "Entering $path"); | |
$fsck_report['landing_zone']['num_dirs']++; | |
$list = array(); | |
$handle = opendir($path); | |
if ($handle === FALSE) { | |
gh_log(ERROR, " Couldn't open $path to list content. Skipping..."); | |
return; | |
} | |
while (($filename = readdir($handle)) !== FALSE) { | |
if ($filename != '.' && $filename != '..') { | |
$full_path = "$path/$filename"; | |
$file_type = @filetype($full_path); | |
if ($file_type == 'dir') { | |
gh_fsck($full_path, $share, $storage_path); | |
} else { | |
gh_fsck_file($path, $filename, $file_type, 'landing_zone', $share, $storage_path); | |
} | |
} | |
} | |
closedir($handle); | |
} | |
function gh_fsck_metastore($root, $path, $share) { | |
global $fsck_report; | |
gh_log(DEBUG, "Entering metastore $root$path"); | |
if (!is_dir("$root$path")) { | |
gh_log(DEBUG, " Not a directory... Aborting."); | |
return; | |
} | |
$handle = opendir("$root$path"); | |
while (($filename = readdir($handle)) !== FALSE) { | |
if ($filename != '.' && $filename != '..') { | |
if (@is_dir("$root$path/$filename")) { | |
$fsck_report['metastore']['num_dirs']++; | |
gh_fsck_metastore($root, "$path/$filename", $share); | |
} else { | |
// Found a metafile | |
$path_parts = explode('/', $path); | |
array_shift($path_parts); | |
$share = array_shift($path_parts); | |
$landing_zone = get_share_landing_zone($share); | |
$local_path = $landing_zone . '/' . implode('/', $path_parts); | |
// If file exists in landing zone, we already fsck-ed it in gh_fsck(); let's not repeat ourselves, shall we? | |
if (!file_exists("$local_path/$filename")) { | |
gh_fsck_file($local_path, $filename, FALSE, 'metastore', $share); | |
} | |
} | |
} | |
} | |
closedir($handle); | |
} | |
function gh_fsck_file($path, $filename, $file_type, $source, $share, $storage_path = FALSE) { | |
global $storage_pool_drives, $fsck_report, $options, $action, $shares_options; | |
$share_options = $shares_options[$share]; | |
$landing_zone = get_share_landing_zone($share); | |
if($storage_path === FALSE){ | |
$file_path = trim(mb_substr($path, mb_strlen($landing_zone)+1), '/'); | |
}else{ | |
$file_path = trim(mb_substr($path, mb_strlen("$storage_path/$share")+1), '/'); | |
} | |
if ($source == 'metastore') { | |
$fsck_report['metastore']['num_files']++; | |
} | |
if ($file_type !== FALSE) { | |
$fsck_report['landing_zone']['num_files']++; | |
} | |
if ($file_type == 'file') { | |
if($storage_path === FALSE){ | |
// Let's just add a 'write' task for this file; if it's a duplicate of an already pending task, it won't be processed twice, since the simplify function will remove such duplicates. | |
gh_log(INFO, "$path/$filename is a file (not a symlink). Adding a new 'write' pending task for that file."); | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, complete) VALUES ('write', '%s', '%s', 'yes')", | |
db_escape_string($share), | |
db_escape_string(empty($file_path) ? $filename : clean_dir("$file_path/$filename")) | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't insert write task: " . db_error()); | |
return; | |
} | |
} else { | |
if ($source == 'metastore') { | |
if ($file_type == 'link' && !file_exists(readlink("$path/$filename"))) { | |
// Link points to now gone copy; let's just remove it, and treat this as if the link was not there in the first place. | |
unlink("$path/$filename"); | |
$file_type = FALSE; | |
} | |
if ($file_type === FALSE) { | |
if ($action != 'fsck_file') { | |
// Maybe this file was removed after fsck started, and thus shouldn't be re-created here! | |
// We'll queue this file fsck (to restore the symlink) for when all other file operations have been executed. | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, complete, additional_info) VALUES ('fsck_file', '%s', '%s', 'idle', '%s')", | |
db_escape_string($share), | |
db_escape_string(empty($file_path) ? $filename : clean_dir("$file_path/$filename")), | |
@$options['all-fsck-options'] | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't insert write task: " . db_error()); | |
gh_log(DEBUG, " Queuing a new fsck_file task for " . clean_dir("$share/$file_path/$filename")); | |
return; | |
} | |
} | |
} | |
} | |
$file_metafiles = array(); | |
$num_ok = 0; | |
$file_copies_inodes = array(); | |
if (get_metafile_data_filename($share, $file_path, $filename) === FALSE) { | |
$full_path = "$path/$filename"; | |
// Check if this is a temporary file; if so, just delete it. | |
if (is_temp_file($full_path)) { | |
$fsck_report['temp_files'][] = $full_path; | |
gh_recycle($full_path); | |
return; | |
} | |
if ($storage_path !== FALSE) { | |
if (@$options['find-orphans']) { | |
gh_log(INFO, "$full_path is an orphaned file; we'll proceed to find all copies and symlink this file appropriately."); | |
$fsck_report['orphaned']['num_orphans']++; | |
} else { | |
gh_log(INFO, "$full_path is an orphaned file, but we're not looking for orphans. For Greyhole to recognize this file, initiate a --fsck with the --find-orphaned-files option."); | |
return; | |
} | |
} | |
} | |
// Look for this file on all available drives | |
foreach ($storage_pool_drives as $sp_drive) { | |
$clean_full_path = clean_dir("$sp_drive/$share/$file_path/$filename"); | |
$inode_number = @gh_fileinode($clean_full_path); | |
if ($inode_number !== FALSE) { | |
if (is_dir($clean_full_path)) { | |
gh_log(DEBUG, "Found a directory that should be a file! Will try to remove it, if it's empty."); | |
@rmdir($clean_full_path); | |
continue; | |
} | |
gh_log(DEBUG, "Found $clean_full_path"); | |
if (!is_greyhole_owned_dir($sp_drive)) { | |
gh_log(DEBUG, " Drive $sp_drive is not part of the Greyhole storage pool anymore. The above file will not be counted as a valid file copy, but can be used to create a new valid copy."); | |
$file_metafiles[$clean_full_path] = (object) array('path' => $clean_full_path, 'is_linked' => FALSE, 'state' => 'Gone'); | |
if (!isset($original_file_path)) { | |
$original_file_path = $clean_full_path; | |
} | |
} else { | |
$file_metafiles[$clean_full_path] = (object) array('path' => $clean_full_path, 'is_linked' => FALSE, 'state' => 'OK'); | |
$file_copies_inodes[$inode_number] = $clean_full_path; | |
$num_ok++; | |
} | |
// Temp files leftovers of stopped Greyhole executions | |
$temp_filename = get_temp_filename($clean_full_path); | |
if (file_exists($temp_filename) && gh_is_file($temp_filename)) { | |
gh_log(INFO, " Found temporary file $temp_filename ... deleting."); | |
$fsck_report['temp_files'][] = $temp_filename; | |
gh_recycle($temp_filename); | |
} | |
} | |
} | |
foreach (get_metafiles($share, $file_path, $filename, TRUE) as $metafile_block){ | |
foreach ($metafile_block as $metafile) { | |
$inode_number = @gh_fileinode($metafile->path); | |
if ($inode_number === FALSE) { | |
$metafile->state = 'Gone'; | |
$metafile->is_linked = FALSE; | |
$root_path = str_replace(clean_dir("/$share/$file_path/$filename"), '', $metafile->path); | |
if (gone_ok($root_path)) { | |
// Let's not replace this copy yet... | |
$file_copies_inodes[$metafile->path] = $metafile->path; | |
$num_ok++; | |
$fsck_report['gone_ok']++; | |
} | |
} else if (is_dir($metafile->path)) { | |
gh_log(DEBUG, "Found a directory that should be a file! Will try to remove it, if it's empty."); | |
@rmdir($metafile->path); | |
$metafile->state = 'Gone'; | |
$metafile->is_linked = FALSE; | |
continue; | |
} else { | |
$metafile->state = 'OK'; | |
if (!isset($file_metafiles[$metafile->path])) { | |
$file_copies_inodes[$inode_number] = $metafile->path; | |
$num_ok++; | |
} | |
} | |
$file_metafiles[clean_dir($metafile->path)] = $metafile; | |
} | |
} | |
if (count($file_copies_inodes) > 0) { | |
// If no metafile is linked, link the 1st one | |
$found_linked_metafile = FALSE; | |
foreach ($file_metafiles as $key => $metafile) { | |
if ($metafile->is_linked) { | |
if (file_exists($metafile->path)) { | |
$found_linked_metafile = TRUE; | |
$expected_file_size = gh_filesize($metafile->path); | |
$original_file_path = $metafile->path; | |
break; | |
} else { | |
$metafile->is_linked = FALSE; | |
$metafile->state = 'Gone'; | |
} | |
} | |
} | |
if (!$found_linked_metafile) { | |
$first_metafile = reset($file_metafiles); | |
$first_metafile->is_linked = TRUE; | |
$expected_file_size = gh_filesize($first_metafile->path); | |
$original_file_path = $first_metafile->path; | |
} | |
if (@$options['disk-usage-report']) { | |
global $du; | |
$du_path = '/' . trim(clean_dir("/$share/$file_path"), '/'); | |
do { | |
if (!isset($du[$du_path])) { | |
$du[$du_path] = 0; | |
} | |
$du[$du_path] += $expected_file_size; | |
$du_path = mb_substr($du_path, 0, mb_strrpos($du_path, '/')); | |
} while (!empty($du_path)); | |
} | |
// Check that all file copies have the same size | |
foreach ($file_copies_inodes as $key => $real_full_path) { | |
if (array_search($real_full_path, array_keys($file_copies_inodes)) !== FALSE) { | |
// That file isn't available atm, but it's OK. | |
continue; | |
} | |
$file_size = gh_filesize($real_full_path); | |
if ($file_size != $expected_file_size) { | |
// Found a file with a different size than the original... | |
// There might be a good reason. Let's look for one! | |
if (real_file_is_locked($real_full_path) !== FALSE || real_file_is_locked($original_file_path) !== FALSE) { | |
// Write operation in progress | |
continue; | |
} | |
// A pending write transaction maybe? | |
parse_samba_spool(FALSE); | |
$query = sprintf("SELECT * FROM tasks WHERE action = 'write' AND share = '%s' AND full_path = '%s'", | |
db_escape_string($share), | |
db_escape_string("$file_path/$filename") | |
); | |
$result = db_query($query) or gh_log(CRITICAL, "Can't select for pending writes" . db_error()); | |
if (db_fetch_object($result) !== FALSE) { | |
// Pending write task | |
continue; | |
} | |
// Found no good reason! | |
if ($file_size == 0) { | |
// Empty file; just delete it. | |
gh_log(WARN, " An empty file copy was found: $real_full_path is 0 bytes. Original: $original_file_path is " . number_format($expected_file_size) . " bytes. This empty copy will be deleted."); | |
unlink($real_full_path); | |
} else { | |
gh_log(WARN, " A file copy with a different file size than the original was found: $real_full_path is " . number_format($file_size) . " bytes. Original: $original_file_path is " . number_format($expected_file_size) . " bytes."); | |
gh_recycle($real_full_path); | |
$fsck_report['wrong_file_size'][clean_dir($real_full_path)] = array($file_size, $expected_file_size, $original_file_path); | |
} | |
// Will not count that copy as a valid copy! | |
unset($file_copies_inodes[$key]); | |
unset($file_metafiles[clean_dir($real_full_path)]); | |
} | |
} | |
} | |
$num_copies_required = get_num_copies($share); | |
if (count($file_copies_inodes) == $num_copies_required) { | |
// It's okay if the file isn't a symlink so long as we're looking at a storage volume path and not a share path | |
if (!$found_linked_metafile || ($file_type != 'link' && $storage_path === FALSE)) { | |
// Re-create symlink... | |
if (!$found_linked_metafile) { | |
// ... the old one points to a drive that was replaced | |
gh_log(INFO, ' Symlink target moved. Updating symlink.'); | |
$fsck_report['symlink_target_moved']++; | |
} else { | |
// ... it was missing | |
gh_log(INFO, ' Symlink was missing. Creating new symlink.'); | |
} | |
foreach ($file_metafiles as $key => $metafile) { | |
if ($metafile->is_linked) { | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$file_path/$filename to point to $metafile->path"); | |
@unlink("$landing_zone/$file_path/$filename"); | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
gh_mkdir("$landing_zone/$file_path", $metafile_dir_path); | |
symlink($metafile->path, "$landing_zone/$file_path/$filename"); | |
break; | |
} | |
} | |
save_metafiles($share, $file_path, $filename, array_values($file_metafiles)); | |
} | |
} else if (count($file_copies_inodes) == 0 && !isset($original_file_path)) { | |
gh_log(WARN, ' WARNING! No copies of this file are available in the Greyhole storage pool. ' . (is_link("$landing_zone/$file_path/$filename") ? 'Deleting from share.' : (gh_is_file("$landing_zone/$file_path/$filename") ? 'Did you copy that file there without using your Samba shares? (If you did, don\'t do that in the future.)' : ''))); | |
if ($source == 'metastore' || get_metafile_data_filename($share, $file_path, $filename) !== FALSE) { | |
$fsck_report['no_copies_found_files'][clean_dir("$share/$file_path/$filename")] = TRUE; | |
} | |
if (is_link("$landing_zone/$file_path/$filename")) { | |
gh_recycle("$landing_zone/$file_path/$filename"); | |
} else if (gh_is_file("$landing_zone/$file_path/$filename")) { | |
gh_log(INFO, "$share/$file_path/$filename is a file (not a symlink). Adding a new 'write' pending task for that file."); | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, complete) VALUES ('write', '%s', '%s', 'yes')", | |
db_escape_string($share), | |
db_escape_string(empty($file_path) ? $filename : clean_dir("$file_path/$filename")) | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't insert write task: " . db_error()); | |
} | |
if (@$options['del-orphaned-metadata']) { | |
remove_metafiles($share, $file_path, $filename); | |
} else { | |
save_metafiles($share, $file_path, $filename, array_values($file_metafiles)); | |
} | |
} else if (count($file_copies_inodes) < $num_copies_required) { | |
// Create new copies | |
gh_log(INFO, " Missing file copies. Expected $num_copies_required, got " . count($file_copies_inodes) . ". Will create more copies using $original_file_path"); | |
if (isset($fsck_report['missing_copies'])) { | |
$fsck_report['missing_copies']++; | |
} | |
clearstatcache(); $filesize = gh_filesize("$original_file_path"); | |
$file_metafiles = create_metafiles($share, "$file_path/$filename", $num_copies_required, $filesize, $file_metafiles); | |
// Re-copy the file everywhere, and re-create the symlink | |
$symlink_created = FALSE; | |
$num_copies_current = 1; # the source file | |
global $going_dir; | |
if (!empty($going_dir)) { | |
// Let's not count the source file here, since it will be gone soon! | |
$num_copies_current = 0; | |
} | |
foreach ($file_metafiles as $key => $metafile) { | |
if ($original_file_path != $metafile->path) { | |
if ($num_copies_current >= $num_copies_required) { | |
$metafile->state = 'Gone'; | |
$file_metafiles[$key] = $metafile; | |
continue; | |
} | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
if ($metafile->state == 'Gone') { | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (get_storage_volume_from_path($metafile_dir_path) == $sp_drive && is_greyhole_owned_dir($sp_drive)) { | |
$metafile->state = 'Pending'; | |
$file_metafiles[$key] = $metafile; | |
break; | |
} | |
} | |
} | |
if ($metafile->state != 'Gone') { | |
$root_path = str_replace(clean_dir("/$share/$file_path"), '', $metafile_dir_path); | |
list($original_path, $metafile_filename) = explode_full_path(get_share_landing_zone($share) . "/$file_path"); | |
if (!gh_mkdir($metafile_dir_path, $original_path)) { | |
$metafile->state = 'Gone'; | |
$file_metafiles[$key] = $metafile; | |
continue; | |
} | |
} | |
if (!is_dir($metafile_dir_path) || $metafile->state == 'Gone') { | |
continue; | |
} | |
if ($metafile->state == 'Pending') { | |
$source_size = gh_filesize($original_file_path); | |
gh_log(DEBUG, " Copying " . bytes_to_human($source_size, FALSE) . " file to $metafile->path"); | |
$temp_path = get_temp_filename($metafile->path); | |
$start_time = time(); | |
exec(get_copy_cmd($original_file_path, $temp_path)); | |
$it_worked = file_exists($temp_path) && file_exists($original_file_path) && gh_filesize($temp_path) == gh_filesize($original_file_path); | |
if ($it_worked) { | |
if (time() - $start_time > 0) { | |
$speed = number_format($source_size/1024/1024 / (time() - $start_time), 1); | |
gh_log(DEBUG, " Copy created at $speed MBps."); | |
} | |
gh_rename($temp_path, $metafile->path); | |
gh_chperm($metafile->path, gh_get_file_infos($original_file_path)); | |
$metafile->state = 'OK'; | |
$num_copies_current++; | |
} else { | |
gh_log(WARN, " Failed file copy. Skipping."); | |
if ($metafile->is_linked) { | |
$metafile->is_linked = FALSE; | |
} | |
$metafile->state = 'Gone'; | |
} | |
$file_metafiles[$key] = $metafile; | |
} | |
} | |
if ($original_file_path == $metafile->path || $metafile->is_linked) { | |
if (!empty($going_dir) && get_storage_volume_from_path($original_file_path) == $going_dir) { | |
$metafile->is_linked = FALSE; | |
$metafile->state = 'Gone'; | |
$file_metafiles[$key] = $metafile; | |
continue; | |
} | |
if ($symlink_created /* already */) { | |
$metafile->is_linked = FALSE; | |
$file_metafiles[$key] = $metafile; | |
continue; | |
} | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$file_path/$filename to point to $metafile->path"); | |
gh_recycle("$landing_zone/$file_path/$filename"); | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
gh_mkdir("$landing_zone/$file_path", $metafile_dir_path); | |
symlink($metafile->path, "$landing_zone/$file_path/$filename"); | |
$symlink_created = TRUE; | |
} | |
} | |
if (!$symlink_created) { | |
foreach ($file_metafiles as $key => $metafile) { | |
if ($metafile->state == 'OK') { | |
$metafile->is_linked = TRUE; | |
$file_metafiles[$key] = $metafile; | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$file_path/$filename to point to $metafile->path"); | |
gh_recycle("$landing_zone/$file_path/$filename"); | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
gh_mkdir("$landing_zone/$file_path", $metafile_dir_path); | |
symlink($metafile->path, "$landing_zone/$file_path/$filename"); | |
break; | |
} | |
} | |
} | |
save_metafiles($share, $file_path, $filename, array_values($file_metafiles)); | |
} else { | |
# Let's not assume that files on missing drives are really there... Removing files here could be dangerous! | |
foreach ($file_copies_inodes as $inode => $path) { | |
if (mb_strpos($inode, '/') === 0) { | |
unset($file_copies_inodes[$inode]); | |
} | |
} | |
if (count($file_copies_inodes) > $num_copies_required) { | |
gh_log(INFO, " Too many file copies. Expected $num_copies_required, got " . count($file_copies_inodes) . ". Will try to remove some."); | |
if (file_is_locked($share, "$file_path/$filename") !== FALSE) { | |
gh_log(INFO, " File is locked. Will not remove copies at this time. The next fsck will try to remove copies again."); | |
return; | |
} | |
$fsck_report['too_many_copies']++; | |
$local_target_drives = array_values(order_target_drives(0, TRUE, $share, $file_path)); | |
while (count($file_copies_inodes) > $num_copies_required && !empty($local_target_drives)) { | |
$sp_drive = array_pop($local_target_drives); | |
$key = clean_dir("$sp_drive/$share/$file_path/$filename"); | |
gh_log(DEBUG, " Looking for copy at $key"); | |
if (isset($file_metafiles[$key]) || gh_file_exists($key)) { | |
if (isset($file_metafiles[$key])) { | |
$metafile = $file_metafiles[$key]; | |
} | |
if (gh_file_exists($key) || $metafile->state == 'OK') { | |
gh_log(DEBUG, " Found file copy at $key, or metadata file is marked OK."); | |
if (real_file_is_locked($key) !== FALSE) { | |
gh_log(DEBUG, " File copy is locked. Won't remove it."); | |
continue; | |
} | |
$fsck_report['too_many_files'][] = $key; | |
gh_log(DEBUG, " Removing copy at $key"); | |
unset($file_copies_inodes[gh_fileinode($key)]); | |
gh_recycle($key); | |
if (isset($file_metafiles[$key])) { | |
unset($file_metafiles[$key]); | |
} | |
$num_ok--; | |
} | |
} | |
} | |
// If no metafile is linked, link the 1st one | |
$found_linked_metafile = FALSE; | |
foreach ($file_metafiles as $key => $metafile) { | |
if ($metafile->is_linked) { | |
$found_linked_metafile = TRUE; | |
break; | |
} | |
} | |
if (!$found_linked_metafile) { | |
$metafile = reset($file_metafiles); | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$file_path/$filename to point to $metafile->path"); | |
gh_recycle("$landing_zone/$file_path/$filename"); | |
list($metafile_dir_path, $metafile_filename) = explode_full_path($metafile->path); | |
gh_mkdir("$landing_zone/$file_path", $metafile_dir_path); | |
symlink($metafile->path, "$landing_zone/$file_path/$filename"); | |
reset($file_metafiles)->is_linked = TRUE; | |
} | |
save_metafiles($share, $file_path, $filename, array_values($file_metafiles)); | |
} | |
} | |
// Queue all file copies checksum calculations, if --checksums was specified | |
if (@$options['verify-checksums']) { | |
foreach (get_metafiles($share, $file_path, $filename, TRUE) as $metafile_block) { | |
foreach ($metafile_block as $metafile) { | |
if ($metafile->state != 'OK') { continue; } | |
$inode_number = @gh_fileinode($metafile->path); | |
if ($inode_number !== FALSE) { | |
// Let's calculate this file's MD5 checksum to validate that all copies are valid. | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, additional_info, complete) VALUES ('md5', '%s', '%s', '%s', 'no')", | |
db_escape_string($share), | |
db_escape_string(clean_dir("$file_path/$filename")), | |
db_escape_string($metafile->path) | |
); | |
db_query($query) or gh_log(CRITICAL, "Can't insert md5 task: " . db_error()); | |
} | |
} | |
} | |
} | |
} | |
function initialize_fsck_report($what) { | |
global $fsck_report; | |
$fsck_report = array(); | |
$fsck_report['start'] = time(); | |
$fsck_report['what'] = $what; | |
$fsck_report['metastore'] = array(); | |
$fsck_report['metastore']['num_dirs'] = 0; | |
$fsck_report['metastore']['num_files'] = 0; | |
$fsck_report['orphaned']['num_orphans'] = 0; | |
$fsck_report['landing_zone'] = array(); | |
$fsck_report['landing_zone']['num_dirs'] = 0; | |
$fsck_report['landing_zone']['num_files'] = 0; | |
$fsck_report['no_copies_found_files'] = array(); | |
$fsck_report['symlink_target_moved'] = 0; | |
$fsck_report['too_many_copies'] = 0; | |
$fsck_report['too_many_files'] = array(); | |
$fsck_report['missing_copies'] = 0; | |
$fsck_report['wrong_file_size'] = array(); | |
$fsck_report['temp_files'] = array(); | |
$fsck_report['gone_ok'] = 0; | |
} | |
function get_fsck_report() { | |
global $fsck_report, $storage_pool_drives, $gone_ok_drives, $options; | |
$fsck_report['end'] = time(); | |
$displayable_duration = duration_to_human($fsck_report['end'] - $fsck_report['start']); | |
$report = "fsck report | |
----------- | |
Scanned directory: " . $fsck_report['what'] . " | |
Started: " . date('Y-m-d H:i:s', $fsck_report['start']) . " | |
Ended: " . date('Y-m-d H:i:s', $fsck_report['end']) . " | |
Duration: $displayable_duration | |
Metadata Store: | |
Found " . number_format($fsck_report['metastore']['num_dirs']) . " directories | |
Found " . number_format($fsck_report['metastore']['num_files']) . " files | |
Landing Zone (shares): | |
Found " . number_format($fsck_report['landing_zone']['num_dirs']) . " directories | |
Found " . number_format($fsck_report['landing_zone']['num_files']) . " files | |
Found " . number_format($fsck_report['orphaned']['num_orphans']) . " orphans | |
Trash size:\n"; | |
foreach ($storage_pool_drives as $dir) { | |
$trash_path = clean_dir("$dir/.gh_trash"); | |
if (is_dir($trash_path)) { | |
$report .= " $trash_path = " . trim(exec("du -sh " . escapeshellarg($trash_path) . " | awk '{print $1}'"))."\n"; | |
} else { | |
$report .= " $trash_path = empty\n"; | |
} | |
} | |
// Errors | |
if (empty($fsck_report['no_copies_found_files']) && count($fsck_report['wrong_file_size']) == 0) { | |
$report .= "\nNo problems found.\n\n"; | |
} else { | |
$report .= "\nProblems:\n"; | |
if (!empty($fsck_report['no_copies_found_files'])) { | |
ksort($fsck_report['no_copies_found_files']); | |
$report .= " Found " . count($fsck_report['no_copies_found_files']) . " files in the metadata store for which no file copies were found.\n"; | |
if (@$options['del-orphaned-metadata']) { | |
$report .= " Those metadata files have been deleted, since you used the --delete-orphaned-metadata option. They will not re-appear in the future.\n"; | |
} else { | |
$report .= " Those files were removed from the Landing Zone. (i.e. those files are now gone!) They will re-appear in your shares if a copy re-appear and fsck is run.\n"; | |
$report .= " If you don't want to see those files listed here each time fsck runs, delete the corresponding files from the metadata store using \"greyhole --fsck --delete-orphaned-metadata\"\n"; | |
} | |
$report .= " Files with no copies:\n"; | |
$report .= " " . implode("\n ", array_keys($fsck_report['no_copies_found_files'])) . "\n\n"; | |
} | |
if (count($fsck_report['wrong_file_size']) > 0) { | |
$report .= " Found " . count($fsck_report['wrong_file_size']) . " file copies with the wrong file size. Those files don't have the same file size as the original files available on your shares. The invalid copies have been moved into the trash.\n"; | |
foreach ($fsck_report['wrong_file_size'] as $real_file_path => $info_array) { | |
$report .= " $real_file_path is " . number_format($info_array[0]) . " bytes; should be " . number_format($info_array[1]) . " bytes.\n"; | |
} | |
$report .= "\n\n"; | |
} | |
} | |
// Warnings | |
if ($fsck_report['too_many_copies'] == 0 && $fsck_report['symlink_target_moved'] == 0 && count($fsck_report['temp_files']) == 0 && $fsck_report['gone_ok'] == 0) { | |
// Nothing to say... | |
} else { | |
$report .= "Notices:\n"; | |
if ($fsck_report['too_many_copies'] > 0) { | |
$fsck_report['too_many_files'] = array_unique($fsck_report['too_many_files']); | |
$report .= " Found " . $fsck_report['too_many_copies'] . " files for which there was too many file copies. Deleted (or moved in trash) files:\n"; | |
$report .= " " . implode("\n ", $fsck_report['too_many_files']) . "\n\n"; | |
} | |
if ($fsck_report['symlink_target_moved'] > 0) { | |
$report .= " Found " . $fsck_report['symlink_target_moved'] . " files in the Landing Zone that were pointing to a now gone copy. | |
Those symlinks were updated to point to the new location of those file copies.\n\n"; | |
} | |
if (count($fsck_report['temp_files']) > 0) { | |
$report .= " Found " . count($fsck_report['temp_files']) . " temporary files, which are leftovers of interrupted Greyhole executions. The following temporary files were deleted (or moved into the trash):\n"; | |
$report .= " " . implode("\n ", $fsck_report['temp_files']) . "\n\n"; | |
} | |
if ($fsck_report['gone_ok'] > 0) { | |
$report .= " Found " . $fsck_report['gone_ok'] . " missing files that are in a storage pool drive marked Temporarily-Gone. | |
If this directory is gone for good, you should execute the following command, and remove the directory from your configuration file: | |
greyhole --gone=path | |
where path is one of:\n"; | |
$report .= " " . implode("\n ", array_keys($gone_ok_drives)) . "\n\n"; | |
} | |
} | |
return $report; | |
} | |
function gh_recycle($real_path) { | |
$is_symlink = FALSE; | |
clearstatcache(); | |
if (is_link($real_path)) { | |
$is_symlink = TRUE; | |
} else if (!file_exists($real_path)) { | |
return TRUE; | |
} | |
$should_move_to_trash = FALSE; | |
if (!$is_symlink) { | |
global $storage_pool_drives, $delete_moves_to_trash, $shares_options; | |
$share_options = get_share_options_from_full_path($real_path); | |
if ($share_options !== FALSE) { | |
$full_path = trim($share_options['name'] . "/" . str_replace($share_options['landing_zone'], '', $real_path), '/'); | |
$share = $share_options['name']; | |
} else { | |
$storage_volume = get_storage_volume_from_path($real_path); | |
foreach ($storage_pool_drives as $dir) { | |
if ($dir == $storage_volume) { | |
$trash_path = "$dir/.gh_trash"; | |
$full_path = trim(substr($real_path, strlen($dir)), '/'); | |
break; | |
} | |
} | |
$share = mb_substr($full_path, 0, mb_strpos($full_path, '/')); | |
$should_move_to_trash = $shares_options[$share]['delete_moves_to_trash']; | |
} | |
} | |
if ($should_move_to_trash) { | |
// Move to trash | |
if (!isset($trash_path)) { | |
gh_log(WARN, " Warning! Can't find trash for $real_path. Won't delete this file!"); | |
return FALSE; | |
} | |
$target_path = clean_dir("$trash_path/$full_path"); | |
list($path, $filename) = explode_full_path($target_path); | |
list($original_path, $filename) = explode_full_path(get_share_landing_zone($share) . "/$full_path"); | |
if (@gh_is_file($path)) { | |
unlink($path); | |
} | |
$dir_infos = (object) array( | |
'fileowner' => 0, | |
'filegroup' => 0, | |
'fileperms' => (int) base_convert("0777", 8, 10) | |
); | |
gh_mkdir($path, $dir_infos); | |
if (@is_dir($target_path)) { | |
exec("rm -rf " . escapeshellarg($target_path)); | |
} | |
if (@gh_rename($real_path, $target_path)) { | |
gh_log(DEBUG, " Moved copy from $real_path to trash: $target_path"); | |
// Create a symlink in the Greyhole Trash share, to allow the user to remove this file using that share | |
create_trash_share_symlink($target_path, $trash_path); | |
return TRUE; | |
} | |
} else { | |
if (@unlink($real_path)) { | |
if (!$is_symlink) { | |
gh_log(DEBUG, " Deleted copy at $real_path"); | |
} | |
return TRUE; | |
} | |
} | |
return FALSE; | |
} | |
function repair_tables() { | |
global $db_use_mysql, $action; | |
if (@$db_use_mysql) { | |
if ($action == 'daemon') { | |
gh_log(INFO, "Optimizing MySQL tables..."); | |
} | |
db_query("REPAIR TABLE tasks") or gh_log(CRITICAL, "Can't repair tasks table: " . db_error()); | |
db_query("REPAIR TABLE settings") or gh_log(CRITICAL, "Can't repair settings table: " . db_error()); | |
// Let's repair tasks_completed only if it's broken! | |
$result = db_query("SELECT * FROM tasks_completed LIMIT 1"); | |
if ($result === FALSE) { | |
gh_log(INFO, "Repairing MySQL tables..."); | |
db_query("REPAIR TABLE tasks_completed") or gh_log(CRITICAL, "Can't repair tasks_completed table: " . db_error()); | |
} | |
} | |
} | |
function get_metastores_from_storage_volume($storage_volume){ | |
$volume_metastores = array(); | |
foreach (get_metastores() as $metastore) { | |
if (get_storage_volume_from_path($metastore) == $storage_volume) { | |
$volume_metastores[] = $metastore; | |
} | |
} | |
return $volume_metastores; | |
} | |
function get_metastores($use_cache=TRUE) { | |
global $storage_pool_drives, $metastore_backup_drives, $global_metastores; | |
if (!isset($global_metastores) || !$use_cache) { | |
$metastores = array(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (is_greyhole_owned_dir($sp_drive)) { | |
$metastores[] = "$sp_drive/.gh_metastore"; | |
} | |
} | |
foreach ($metastore_backup_drives as $metastore_backup_drive) { | |
if (is_greyhole_owned_dir(str_replace('/.gh_metastore_backup', '', $metastore_backup_drive))) { | |
$metastores[] = $metastore_backup_drive; | |
} | |
} | |
$global_metastores = $metastores; | |
} | |
return $global_metastores; | |
} | |
function get_share_options_from_full_path($full_path) { | |
global $shares_options; | |
$landing_zone = ''; | |
$share = FALSE; | |
foreach ($shares_options as $share_name => $share_options) { | |
if (mb_strpos($full_path, $share_options['landing_zone']) === 0 && mb_strlen($share_options['landing_zone']) > mb_strlen($landing_zone)) { | |
$landing_zone = $share_options['landing_zone']; | |
$share = $share_options; | |
break; | |
} | |
} | |
return $share; | |
} | |
function get_storage_volume_from_path($full_path){ | |
global $storage_pool_drives; | |
$storage_volume = FALSE; | |
$longest_path_found = 0; | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (mb_strpos($full_path, $sp_drive) === 0 && mb_strlen($sp_drive) > $longest_path_found) { | |
$storage_volume = $sp_drive; | |
$longest_path_found = mb_strlen($sp_drive); | |
break; | |
} | |
} | |
return $storage_volume; | |
} | |
function get_share_options_from_storage_volume($full_path,$storage_volume) { | |
global $shares_options; | |
$landing_zone = ''; | |
$share = FALSE; | |
foreach ($shares_options as $share_name => $share_options) { | |
$metastore = get_metastore_from_path($full_path); | |
if($metastore !== FALSE){ | |
if (mb_strpos($full_path, "$metastore/$share_name") === 0 && mb_strlen($share_options['landing_zone']) > mb_strlen($landing_zone)) { | |
$land_zone = $share_options['landing_zone']; | |
$share = $share_options; | |
break; | |
} | |
}else{ | |
if (mb_strpos($full_path, "$storage_volume/$share_name") === 0 && mb_strlen($share_options['landing_zone']) > mb_strlen($landing_zone)) { | |
$landing_zone = $share_options['landing_zone']; | |
$share = $share_options; | |
break; | |
} | |
} | |
} | |
return $share; | |
} | |
function get_metastore_from_path($path){ | |
$metastore_path = FALSE; | |
foreach(get_metastores() as $metastore){ | |
if(mb_strpos($path, $metastore) === 0){ | |
$metastore_path = $metastore; | |
break; | |
} | |
} | |
return $metastore_path; | |
} | |
function gh_get_file_infos($real_path) { | |
if ($real_path == null || !file_exists($real_path)) { | |
return (object) array( | |
'fileowner' => 0, | |
'filegroup' => 0, | |
'fileperms' => (int) base_convert("0777", 8, 10) | |
); | |
} | |
if (is_link($real_path)) { | |
$real_path = readlink($real_path); | |
} | |
return (object) array( | |
'fileowner' => (int) gh_fileowner($real_path), | |
'filegroup' => (int) gh_filegroup($real_path), | |
'fileperms' => (int) base_convert(gh_fileperms($real_path), 8, 10) | |
); | |
} | |
function gh_get_dir_perms($directory) { | |
return (int) base_convert(gh_fileperms($directory), 8, 10); | |
} | |
function gh_balance() { | |
global $storage_pool_drives, $shares_options, $is_sticky; | |
// Start with shares that have sticky files, so that subsequent shares will be used to try to balance what moving files into stick_into drives could debalance... | |
// Then start with the shares for which we keep the most # copies; | |
// That way, if the new drive fails soon, it won't take with it files for which we only have one copy! | |
$sorted_shares_options = $shares_options; | |
uasort($sorted_shares_options, 'compare_share_balance'); | |
foreach ($sorted_shares_options as $share_name => $share_options) { | |
if ($share_options['num_copies'] == count($storage_pool_drives)) { | |
// Files are everywhere; won't be able to use that share to balance available space! | |
continue; | |
} | |
if (@$skip_stickies && is_share_sticky($share_name)) { | |
gh_log(DEBUG, "Skipping sticky share $share_name."); | |
continue; | |
} | |
gh_log(DEBUG, "Balancing share: $share_name"); | |
// Move files from the drive with the less available space to the drive with the most available space. | |
$sorted_pool_drives = sort_storage_dirs_available_space(); | |
$pool_drives_avail_space = array(); | |
foreach ($sorted_pool_drives as $available_space => $drive) { | |
$pool_drives_avail_space[$drive] = $available_space; | |
} | |
$num_total_drives = count($sorted_pool_drives); | |
$balance_direction_asc = array(); | |
foreach ($sorted_pool_drives as $available_space => $drive) { | |
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space); | |
$balance_direction_asc[$drive] = $pool_drives_avail_space[$drive] < $target_avail_space; | |
} | |
foreach ($sorted_pool_drives as $source_drive) { | |
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space); | |
gh_log(DEBUG, "Balancing storage pool drive: $source_drive (". bytes_to_human($pool_drives_avail_space[$source_drive]*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")"); | |
// Files candidate to get moved | |
$files = array(); | |
if (is_dir("$source_drive/$share_name")) { | |
exec("find ". escapeshellarg("$source_drive/$share_name") ." -type f -size +10M", $files); | |
} | |
gh_log(DEBUG, "Found ". count($files) ." files that can be moved."); | |
// Repeat until all drives' available space is balanced. | |
$file_everywhere_counter = 0; | |
foreach ($files as $file) { | |
// Let's not try to move locked files! | |
if (real_file_is_locked($file) !== FALSE) { | |
gh_log(DEBUG, " File $file is locked by another process. Skipping."); | |
continue; | |
} | |
$filesize = gh_filesize($file)/1024; // KB | |
$full_path = mb_substr($file, mb_strlen("$source_drive/$share_name/")); | |
list($path, $filename) = explode_full_path($full_path); | |
gh_log(DEBUG, " Working on file: $share_name/$full_path (". bytes_to_human($filesize*1024, FALSE) .")"); | |
$sp_drives = order_target_drives($filesize, FALSE, $share_name, $path, ' '); | |
unset($sp_drive); | |
if ($is_sticky) { | |
if (count($sp_drives) == $num_total_drives - 1 && array_search($source_drive, $sp_drives) === FALSE) { | |
// Only drive full is the source drive. Let's move files away from there! | |
} else if (count($sp_drives) < $num_total_drives) { | |
$skip_stickies = TRUE; | |
gh_log(DEBUG, " Some drives are full. Skipping sticky shares until all drives have some free space."); | |
break; | |
} | |
$sticky_drives = array_slice($sp_drives, 0, get_num_copies($share_name)); | |
if (array_search($source_drive, $sticky_drives)) { | |
// Source drive is a stick_into drive; let's not move that file! | |
gh_log(DEBUG, " Source is sticky. Skipping."); | |
continue; | |
} | |
$already_stuck_copies = 0; | |
foreach ($sticky_drives as $drive) { | |
if (file_exists("$drive/$share_name/$full_path")) { | |
$already_stuck_copies++; | |
} else { | |
$sp_drive = $drive; | |
} | |
} | |
} else { | |
while (count($sp_drives) > 0) { | |
$drive = array_shift($sp_drives); | |
if (!file_exists("$drive/$share_name/$full_path")) { | |
$sp_drive = $drive; | |
break; | |
} | |
} | |
} | |
if (!isset($sp_drive)) { | |
// Can't find a drive that doesn't have this file; skipping. | |
if ($is_sticky) { | |
gh_log(DEBUG, " Sticky file is already where it should be. Skipping."); | |
} | |
continue; | |
} | |
gh_log(DEBUG, " Target drive: $sp_drive (". bytes_to_human($pool_drives_avail_space[$sp_drive]*1024, FALSE) ." available)"); | |
if ($is_sticky) { | |
gh_log(DEBUG, " Moving sticky file, even if that means it won't help balancing available space."); | |
} else if ($pool_drives_avail_space[$source_drive]+$filesize+1*1024*1024 /*1GB*/ > $pool_drives_avail_space[$sp_drive]-$filesize | |
|| $pool_drives_avail_space[$source_drive]+$filesize > $target_avail_space | |
|| ($balance_direction_asc[$sp_drive] && $pool_drives_avail_space[$sp_drive] > $target_avail_space) | |
|| (!$balance_direction_asc[$sp_drive] && $pool_drives_avail_space[$sp_drive] < $target_avail_space)) { | |
gh_log(DEBUG, " Moving this file wouldn't help balancing available space. Skipping."); | |
#gh_log(DEBUG, " Reason 1: " . var_export($pool_drives_avail_space[$source_drive]+$filesize+1*1024*1024 /*1GB*/ > $pool_drives_avail_space[$sp_drive]-$filesize, TRUE)); | |
#gh_log(DEBUG, " Reason 2: " . var_export($pool_drives_avail_space[$source_drive]+$filesize > $target_avail_space, TRUE)); | |
#gh_log(DEBUG, " Reason 3: " . var_export($balance_direction_asc[$sp_drive] && $pool_drives_avail_space[$sp_drive] > $target_avail_space, TRUE)); | |
#gh_log(DEBUG, " Reason 4: " . var_export(!$balance_direction_asc[$sp_drive] && $pool_drives_avail_space[$sp_drive] < $target_avail_space, TRUE)); | |
#gh_log(DEBUG, " pool_drives_avail_space[source]+filesize+1GB = " . ($pool_drives_avail_space[$source_drive]+$filesize+1*1024*1024)); | |
#gh_log(DEBUG, " pool_drives_avail_space[source]+filesize = " . ($pool_drives_avail_space[$source_drive]+$filesize)); | |
#gh_log(DEBUG, " pool_drives_avail_space[target_drive] = " . ($pool_drives_avail_space[$sp_drive])); | |
#gh_log(DEBUG, " pool_drives_avail_space[target_drive]-filesize = " . ($pool_drives_avail_space[$sp_drive]-$filesize)); | |
#gh_log(DEBUG, " target_avail_space = " . ($target_avail_space)); | |
#gh_log(DEBUG, " balance_direction_asc[target_drive] = " . var_export($balance_direction_asc[$sp_drive], TRUE)); | |
continue; | |
} | |
// Make sure the parent directory exists, before we try moving something there... | |
$original_path = clean_dir("$source_drive/$share_name/$path"); | |
list($target_path, $filename) = explode_full_path("$sp_drive/$share_name/$full_path"); | |
gh_mkdir($target_path, $original_path); | |
// Move the file | |
$temp_path = get_temp_filename("$sp_drive/$share_name/$full_path"); | |
$file_infos = gh_get_file_infos($file); | |
gh_log(DEBUG, " Moving file copy..."); | |
$it_worked = gh_rename($file, $temp_path); | |
if ($it_worked) { | |
gh_rename($temp_path, "$sp_drive/$share_name/$full_path"); | |
gh_chperm("$sp_drive/$share_name/$full_path", $file_infos); | |
$pool_drives_avail_space[$sp_drive] -= $filesize; | |
$pool_drives_avail_space[$source_drive] += $filesize; | |
} else { | |
gh_log(WARN, " Failed file copy. Skipping."); | |
gh_recycle($temp_path); | |
continue; | |
} | |
// Update metafiles | |
foreach (get_metafiles($share_name, $path, $filename, FALSE, TRUE, FALSE) as $existing_metafiles){ | |
foreach ($existing_metafiles as $key => $metafile) { | |
if ($metafile->path == $file) { | |
$metafile->path = "$sp_drive/$share_name/$full_path"; | |
unset($existing_metafiles[$key]); | |
$metafile->state = 'OK'; | |
if ($metafile->is_linked) { | |
// Re-create correct symlink | |
$landing_zone = $share_options['landing_zone']; | |
if (is_link("$landing_zone/$full_path")) { | |
gh_log(DEBUG, " Updating symlink at $landing_zone/$full_path to point to $metafile->path"); | |
if (gh_recycle("$landing_zone/$full_path")) { | |
@symlink($metafile->path, "$landing_zone/$full_path"); | |
// Creating this symlink can fail if the parent dir was removed | |
} | |
} | |
} | |
$existing_metafiles[$metafile->path] = $metafile; | |
save_metafiles($share_name, $path, $filename, $existing_metafiles); | |
break; | |
} | |
} | |
} | |
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space); | |
gh_log(DEBUG, "Balancing storage pool drive: $source_drive (". bytes_to_human($pool_drives_avail_space[$source_drive]*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")"); | |
} | |
gh_log(DEBUG, "Done balancing storage pool drive: $source_drive (". bytes_to_human($pool_drives_avail_space[$source_drive]*1024, FALSE) ." available)"); | |
} | |
gh_log(DEBUG, "Done balancing share: $share_name"); | |
} | |
if (@$skip_stickies) { | |
// We skipped some stickies... Let's re-balance to move those, and continue balancing. | |
$arr = debug_backtrace(); | |
if (count($arr) < 93) { | |
gh_log(DEBUG, "Some shares with sticky files were skipped. Balancing will now re-start to continue moving those sticky files as needed, and further balance. Recursion level = " . count($arr)); | |
gh_balance(); | |
} else { | |
gh_log(WARN, "Maximum number of consecutive balance reached. You'll need to re-execute --balance if you want to balance further."); | |
} | |
} | |
} | |
function is_share_sticky($share_name) { | |
global $sticky_files; | |
if (isset($sticky_files)) { | |
foreach ($sticky_files as $share_dir => $stick_into) { | |
if (mb_strpos($share_dir, $share_name) === 0) { | |
return TRUE; | |
} | |
} | |
} | |
return FALSE; | |
} | |
function compare_share_balance($a, $b) { | |
if (is_share_sticky($a['name']) && !is_share_sticky($b['name'])) { | |
return -1; | |
} | |
if (!is_share_sticky($a['name']) && is_share_sticky($b['name'])) { | |
return 1; | |
} | |
if ($a['num_copies'] != $b['num_copies']) { | |
return $a['num_copies'] > $b['num_copies'] ? -1 : 1; | |
} | |
return strcasecmp($a['name'], $b['name']); | |
} | |
function sort_storage_dirs_available_space() { | |
global $storage_pool_drives, $minimum_free_space_pool_drives; | |
$dfs = get_free_space_in_storage_pool_drives(); | |
foreach ($storage_pool_drives as $sp_drive) { | |
if (!isset($dfs[$sp_drive])) { | |
continue; | |
} | |
$free_space = $dfs[$sp_drive]; | |
$minimum_free_space = (float) (isset($minimum_free_space_pool_drives[$sp_drive]) ? $minimum_free_space_pool_drives[$sp_drive]*1024*1024 : 0.0); | |
$available_space = (float) $free_space - $minimum_free_space; | |
while (isset($sorted_target_drives[$available_space])) { | |
// In case some drives have the exact same amount of available_space, make sure we return all drives! | |
$available_space++; | |
} | |
$sorted_target_drives[$available_space] = $sp_drive; | |
} | |
ksort($sorted_target_drives); | |
return $sorted_target_drives; | |
} | |
function postpone_task($task_id, $complete='yes') { | |
global $sleep_before_task; | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, additional_info, complete) SELECT action, share, full_path, additional_info, '%s' FROM tasks WHERE id = %d", | |
db_escape_string($complete), | |
$task_id | |
); | |
db_query($query) or gh_log(CRITICAL, "Error inserting postponed task: " . db_error()); | |
$sleep_before_task[] = db_insert_id(); | |
} | |
// If running on Amahi, loop until the config works. | |
// User might configure Greyhole later, and they don't want to show Greyhole 'offline' until then. Users are easy to confused! ;) | |
function process_config() { | |
global $action; | |
while (!parse_config()) { | |
// Sleep for a while, if running on Amahi. | |
if (file_exists('/usr/bin/hdactl') && $action == 'daemon') { | |
sleep(600); // 10 minutes | |
} else { | |
// Otherwise, die. | |
gh_log(CRITICAL, "Config file parsing failed. Exiting."); | |
} | |
} | |
// Config is OK; go on! | |
} | |
function gh_wild_mb_strpos($haystack, $needle) { | |
$is_wild = (mb_strpos($needle, "*") !== FALSE); | |
if (!$is_wild) { | |
return mb_strpos($haystack, $needle); | |
} | |
if (str_replace('*', '', $needle) == $haystack) { | |
return FALSE; | |
} | |
$needles = explode("*", $needle); | |
if ($needle[0] == '*') { | |
$first_index = 0; | |
} | |
foreach ($needles as $needle_part) { | |
if ($needle_part == '') { | |
continue; | |
} | |
$needle_index = mb_strpos($haystack, $needle_part); | |
if (!isset($first_index)) { | |
$first_index = $needle_index; | |
} | |
if ($needle_index === FALSE) { | |
return FALSE; | |
} else { | |
$found = TRUE; | |
$haystack = mb_substr($haystack, $needle_index + mb_strlen($needle_part)); | |
} | |
} | |
if ($found) { | |
return $first_index; | |
} | |
return FALSE; | |
} | |
function str_replace_first($search, $replace, $subject) { | |
$firstChar = mb_strpos($subject, $search); | |
if ($firstChar !== FALSE) { | |
$beforeStr = mb_substr($subject, 0, $firstChar); | |
$afterStr = mb_substr($subject, $firstChar + mb_strlen($search)); | |
return $beforeStr . $replace . $afterStr; | |
} else { | |
return $subject; | |
} | |
} | |
function set_metastore_backup($try_restore=TRUE) { | |
global $metastore_backup_drives, $storage_pool_drives; | |
$num_metastore_backups_needed = 2; | |
if (count($storage_pool_drives) < 2) { | |
$metastore_backup_drives = array(); | |
return; | |
} | |
gh_log(DEBUG, "Loading metastore backup directories..."); | |
if (empty($metastore_backup_drives)) { | |
// In the DB ? | |
$setting = Settings::get('metastore_backup_directory'); | |
if ($setting) { | |
$metastore_backup_drives = unserialize($setting->value); | |
gh_log(DEBUG, " Found " . count($metastore_backup_drives) . " directories in the settings table."); | |
} else if ($try_restore) { | |
// Try to load a backup from the data drive, if we can find one. | |
if (Settings::restore()) { | |
set_metastore_backup(FALSE); | |
return; | |
} | |
} | |
} | |
// Verify the drives, if any | |
if (!empty($metastore_backup_drives)) { | |
foreach ($metastore_backup_drives as $key => $metastore_backup_drive) { | |
if (!is_greyhole_owned_dir(str_replace('/.gh_metastore_backup', '', $metastore_backup_drive))) { | |
// Directory is now invalid; stop using it. | |
gh_log(DEBUG, "Removing $metastore_backup_drive from available 'metastore_backup_directories' - this directory isn't a greyhole storage pool drive (anymore?)"); | |
unset($metastore_backup_drives[$key]); | |
} else if (!is_dir($metastore_backup_drive)) { | |
// Directory is invalid, but needs to be created (was rm'ed?) | |
mkdir($metastore_backup_drive); | |
} | |
} | |
} | |
if (empty($metastore_backup_drives) || count($metastore_backup_drives) < $num_metastore_backups_needed) { | |
gh_log(DEBUG, " Missing some drives. Need $num_metastore_backups_needed, currently have " . count($metastore_backup_drives) . ". Will select more..."); | |
$metastore_backup_drives_hash = array(); | |
if (count($metastore_backup_drives) > 0) { | |
$metastore_backup_drives_hash[array_shift($metastore_backup_drives)] = TRUE; | |
} | |
while (count($metastore_backup_drives_hash) < $num_metastore_backups_needed) { | |
// Let's pick new one | |
$metastore_backup_drive = clean_dir($storage_pool_drives[array_rand($storage_pool_drives)] . '/.gh_metastore_backup'); | |
$metastore_backup_drives_hash[$metastore_backup_drive] = TRUE; | |
if (!is_dir($metastore_backup_drive)) { | |
mkdir($metastore_backup_drive); | |
} | |
gh_log(DEBUG, " Randomly picked $metastore_backup_drive"); | |
} | |
$metastore_backup_drives = array_keys($metastore_backup_drives_hash); | |
// Got 2 drives now; save them in the DB | |
Settings::set('metastore_backup_directory', $metastore_backup_drives); | |
} | |
} | |
function create_trash_share_symlink($filepath_in_trash, $trash_path) { | |
global $trash_share; | |
if (isset($trash_share)) { | |
$filepath_in_trash = clean_dir($filepath_in_trash); | |
$filepath_in_trash_share = str_replace($trash_path, $trash_share['landing_zone'], $filepath_in_trash); | |
if (file_exists($filepath_in_trash_share)) { | |
$new_filepath = $filepath_in_trash_share; | |
$i = 1; | |
while (file_exists($new_filepath)) { | |
if (@readlink($new_filepath) == $filepath_in_trash) { | |
// There's already a symlink to that file in the trash share; let's not make a second one! | |
return; | |
} | |
$new_filepath = "$filepath_in_trash_share copy $i"; | |
$i++; | |
} | |
$filepath_in_trash_share = $new_filepath; | |
list($original_path, $filename) = explode_full_path($filepath_in_trash_share); | |
} else { | |
list($original_path, $filename) = explode_full_path($filepath_in_trash); | |
list($path, $filename) = explode_full_path($filepath_in_trash_share); | |
$dir_infos = (object) array( | |
'fileowner' => (int) gh_fileowner($original_path), | |
'filegroup' => (int) gh_filegroup($original_path), | |
'fileperms' => (int) base_convert("0777", 8, 10) | |
); | |
gh_mkdir($path, $dir_infos); | |
} | |
symlink($filepath_in_trash, $filepath_in_trash_share); | |
gh_log(DEBUG, " Created symlink to deleted file in {$trash_share['name']} share ($filename)."); | |
} | |
} | |
function process_command_line() { | |
$opts_actions = 'DfClsSLaqiU5'; | |
$longopts_actions = array( | |
'daemon', | |
'fsck', | |
'cancel-fsck', | |
'balance', | |
'stats', | |
'status', | |
'logs', | |
'empty-trash', | |
'view-queue', | |
'iostat', | |
'getuid', | |
'md5-worker' | |
); | |
$opts_actions_with_values = 'w::g::n::t::b:'; | |
$longopts_actions_with_values = array( | |
'wait-for::', | |
'gone::', | |
'going::', | |
'thaw::', | |
'debug:' | |
); | |
$opts_options = 'eyucokmj'; | |
$longopts_options = array( | |
'email-report', | |
'dont-walk-metadata-store', | |
'disk-usage-report', | |
'if-conf-changed', | |
'find-orphaned-files', | |
'checksums', | |
'delete-orphaned-metadata', | |
'json' | |
); | |
$opts_options_with_values = 'd:'; | |
$longopts_options_with_values = array( | |
'dir:' | |
); | |
$command_line_options = _getopt($opts_actions.$opts_actions_with_values.$opts_options.$opts_options_with_values, | |
array_merge($longopts_actions, $longopts_actions_with_values, $longopts_options, $longopts_options_with_values)); | |
$action = 'unknown'; | |
$options = array(); | |
// Transform short options to long options | |
for ($i=0; $i<mb_strlen($opts_actions); $i++) { | |
if (isset($command_line_options[$opts_actions[$i]])) { | |
$command_line_options[$longopts_actions[$i]] = $command_line_options[$opts_actions[$i]]; | |
} | |
} | |
$opts_actions_with_values = str_replace(':', '', $opts_actions_with_values); | |
for ($i=0; $i<mb_strlen($opts_actions_with_values); $i++) { | |
if (isset($command_line_options[$opts_actions_with_values[$i]])) { | |
$command_line_options[str_replace(':', '', $longopts_actions_with_values[$i])] = $command_line_options[$opts_actions_with_values[$i]]; | |
} | |
} | |
for ($i=0; $i<mb_strlen($opts_options); $i++) { | |
if (isset($command_line_options[$opts_options[$i]])) { | |
$command_line_options[$longopts_options[$i]] = $command_line_options[$opts_options[$i]]; | |
} | |
} | |
$opts_options_with_values = str_replace(':', '', $opts_options_with_values); | |
for ($i=0; $i<mb_strlen($opts_options_with_values); $i++) { | |
if (isset($command_line_options[$opts_options_with_values[$i]])) { | |
$command_line_options[str_replace(':', '', $longopts_options_with_values[$i])] = $command_line_options[$opts_options_with_values[$i]]; | |
} | |
} | |
// Find action and options, from command_line_options | |
foreach ($longopts_actions as $longopt) { | |
if (isset($command_line_options[$longopt])) { | |
$action = $longopt; | |
break; | |
} | |
} | |
foreach ($longopts_actions_with_values as $longopt) { | |
$longopt_action = str_replace(':', '', $longopt); | |
if (isset($command_line_options[$longopt_action])) { | |
$action = $longopt_action; | |
if ($action == 'debug') { | |
$options['debug_filename'] = $command_line_options[$longopt_action]; | |
} else { | |
if ($command_line_options[$longopt_action] !== FALSE) { | |
$options['dir'] = $command_line_options[$longopt_action]; | |
} | |
} | |
break; | |
} | |
} | |
foreach ($longopts_options as $longopt) { | |
if (isset($command_line_options[$longopt])) { | |
$options[$longopt] = TRUE; | |
} | |
} | |
foreach ($longopts_options_with_values as $longopt) { | |
$longopt_option = str_replace(':', '', $longopt); | |
if (isset($command_line_options[$longopt_option])) { | |
$options[$longopt_option] = $command_line_options[$longopt_option]; | |
} | |
} | |
return array($action, $options); | |
} | |
function print_usage() { | |
echo "greyhole, version 0.9.16, for linux-gnu (noarch)\n"; | |
echo "This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n"; | |
echo "and you are welcome to modify and redistribute it under the GPL v3 license.\n"; | |
echo "\n"; | |
echo "Usage: greyhole [ACTION] [OPTIONS]\n"; | |
echo "\n"; | |
echo "Where ACTION is one of:\n"; | |
echo " -?, --help Display this help and exit.\n"; | |
echo " -D, --daemon Start the daemon.\n"; | |
echo " -f, --fsck Schedule a fsck.\n"; | |
echo " -C, --cancel-fsck Cancel all scheduled fsck.\n"; | |
echo " -l, --balance Balance available space on storage pool drives.\n"; | |
echo " -s, --stats Display storage pool statistics.\n"; | |
echo " -i, --iostat I/O statistices for your storage pool drives.\n"; | |
echo " -L, --logs Display new greyhole.log entries as they are logged.\n"; | |
echo " -S, --status Display what the Greyhole daemon is currently doing.\n"; | |
echo " -q, --view-queue Display the current work queue.\n"; | |
echo " -a, --empty-trash Empty the trash.\n"; | |
echo " -b, --debug=filename Debug past file operations.\n"; | |
echo " -t, --thaw[=path] Thaw a frozen directory. Greyhole will start working on\n"; | |
echo " files inside <path>. If you don't supply an option, the list\n"; | |
echo " of frozen directories will be displayed.\n"; | |
echo " -w, --wait-for[=path] Tell Greyhole that the missing drive at <path> will return\n"; | |
echo " soon, and that it shouldn't re-create additional file copies\n"; | |
echo " to replace it. If you don't supply an option, the available\n"; | |
echo " options (paths) will be displayed.\n"; | |
echo " -g, --gone[=path] Tell Greyhole that the missing drive at <path> is gone for\n"; | |
echo " good. Greyhole will start replacing the missing file copies\n"; | |
echo " instantly. If you don't supply an option, the available\n"; | |
echo " options (paths) will be displayed.\n"; | |
echo " -n, --going[=path] Tell Greyhole that you want to remove a drive. Greyhole will\n"; | |
echo " then make sure you don't loose any files, and that the\n"; | |
echo " correct number of file copies are created to replace the\n"; | |
echo " missing drive. If you don't supply an option, the available\n"; | |
echo " options (paths) will be displayed.\n"; | |
echo "\n"; | |
echo "For --stats and --view-queue, the available OPTIONS are:\n"; | |
echo " -j, --json Output the result as JSON, instead of human-readable text.\n"; | |
echo "\n"; | |
echo "For --fsck, the available OPTIONS are:\n"; | |
echo " -e, --email-report Send an email when fsck completes, to report on what was\n"; | |
echo " checked, and any error that was found.\n"; | |
echo " -y, --dont-walk-metadata-store\n"; | |
echo " Speed up fsck by skipping the scan of the metadata store\n"; | |
echo " directories. Scanning the metadata stores is only required to\n"; | |
echo " re-create symbolic links that might be missing from your\n"; | |
echo " shared directories.\n"; | |
echo " -c, --if-conf-changed Only fsck if greyhole.conf or smb.conf paths changed since\n"; | |
echo " the last fsck.\n"; | |
echo " Used in the daily cron to prevent unneccesary fsck runs.\n"; | |
echo " -d, --dir=path Only scan a specific directory, and all sub-directories.\n"; | |
echo " The specified directory should be a Samba share, a\n"; | |
echo " sub-directory of a Samba share, or any directory on a \n"; | |
echo " storage pool drive.\n"; | |
echo " -o, --find-orphaned-files\n"; | |
echo " Scan for files with no metadata in the storage pool drives.\n"; | |
echo " This will allow you to include existing files on a drive\n"; | |
echo " in your storage pool without having to copy them manually.\n"; | |
echo " -k, --checksums Read ALL files in your storage pool, and check that\n"; | |
echo " file copies are identical. This will identify any problem\n"; | |
echo " you might have with your file-systems.\n"; | |
echo " NOTE: this can take a LONG time to complete, since it will read\n"; | |
echo " everything from all your drives!\n"; | |
echo " -m, --delete-orphaned-metadata\n"; | |
echo " When fsck find metadata files with no file copies, delete those\n"; | |
echo " metadata files. If the file copies re-appear later, you'll need\n"; | |
echo " to run fsck with --find-orphaned-files to have them reappear in\n"; | |
echo " your shares.\n"; | |
exit(1); | |
} | |
function get_conf_md5() { | |
global $config_file, $smb_config_file; | |
exec("grep -ie 'num_copies\|storage_pool_directory\|storage_pool_drive\|sticky_files' " . escapeshellarg($config_file) . " | grep -v '^#'", $content); | |
exec("grep -ie 'path\|vfs objects' " . escapeshellarg($smb_config_file) . " | grep -v '^#'", $content); | |
return md5(implode("\n", $content)); | |
} | |
function how_long_ago($past_time) { | |
$ago = ''; | |
$s = time() - $past_time; | |
$m = floor($s / 60); | |
if ($m > 0) { | |
$s -= $m * 60; | |
$h = floor($m / 60); | |
if ($h > 0) { | |
$ago = $h . "h "; | |
$m -= $h * 60; | |
} | |
$ago = $ago . $m . "m "; | |
} | |
$ago = $ago . $s . "s"; | |
if ($ago == '0s') { | |
return 'just now'; | |
} | |
return "$ago ago"; | |
} | |
// This unique identifier will be used when Greyhole calls home, to uniquely identify this Greyhole instance. | |
function get_uniq_id() { | |
global $storage_pool_drives; | |
foreach ($storage_pool_drives as $sp_drive) { | |
$f = "$sp_drive/.greyhole_uses_this"; | |
if (file_exists($f) && filesize($f) == 23) { | |
// Found a valid uid | |
$uniq_id = file_get_contents($f); | |
break; | |
} | |
} | |
if (!isset($uniq_id)) { | |
// No uid found; generate a new one. | |
$uniq_id = uniqid('', TRUE); | |
} | |
return $uniq_id; | |
} | |
// Save a uid in all .greyhole_uses_this files | |
function set_uniq_id() { | |
global $storage_pool_drives; | |
$uniq_id = get_uniq_id(); | |
// Save the old/new uid into all .greyhole_uses_this files | |
foreach ($storage_pool_drives as $sp_drive) { | |
$f = "$sp_drive/.greyhole_uses_this"; | |
if (file_exists($f)) { | |
file_put_contents($f, $uniq_id); | |
} | |
} | |
return $uniq_id; | |
} | |
function get_copy_cmd($source, $destination) { | |
global $copy_method; | |
if (@$copy_method == 'rsync') { | |
return "rsync -t " . escapeshellarg($source) . " " . escapeshellarg($destination); | |
} else { | |
return "cp " . escapeshellarg($source) . " " . escapeshellarg($destination); | |
} | |
} | |
function task_has_option($task, $option) { | |
return (mb_strpos($task->additional_info, $option)) !== FALSE ? TRUE : FALSE; | |
} | |
function spawn_thread($action, $arguments) { | |
// Don't spawn duplicate threads | |
$num_worker_thread = (int) exec('ps ax | grep "/usr/bin/greyhole --' . $action . '" | grep "dir='. implode('" | grep "dir=', $arguments) . '" | grep -v grep | wc -l'); | |
if ($num_worker_thread > 0) { | |
gh_log(DEBUG, "Won't span a duplicate thread; 'greyhole --$action --dir=$arguments[0]' is already running"); | |
return 1; | |
} | |
$cmd = "/usr/bin/greyhole --$action --dir=" . implode(' --dir=', array_map('escapeshellarg', $arguments)); | |
exec("$cmd 1>/var/run/greyhole_m5d_worker.pid 2>&1 &"); | |
usleep(100000); // 1/10s | |
return (int) file_get_contents('/var/run/greyhole_m5d_worker.pid'); | |
} | |
function md5_worker_thread($drives) { | |
$dirs_clause = ''; | |
foreach ($drives as $dir) { | |
if ($dirs_clause != '') { | |
$dirs_clause .= ' OR '; | |
} | |
$dirs_clause .= sprintf("additional_info LIKE '%s%%'", db_escape_string($dir)); | |
} | |
$query = "SELECT id, share, full_path, additional_info FROM tasks WHERE action = 'md5' AND complete = 'no' AND ($dirs_clause) ORDER BY id ASC LIMIT 10"; | |
$last_check_time = time(); | |
while (TRUE) { | |
$task = FALSE; | |
if (!empty($result_new_tasks)) { | |
$task = db_fetch_object($result_new_tasks); | |
if ($task === FALSE) { | |
db_free_result($result_new_tasks); | |
$result_new_tasks = null; | |
} | |
} | |
if ($task === FALSE) { | |
$result_new_tasks = db_query($query) or gh_log(CRITICAL, "Can't query md5 tasks: " . db_error() . "$query"); | |
$task = db_fetch_object($result_new_tasks); | |
} | |
if ($task === FALSE) { | |
// Nothing new to process | |
// Stop this thread once we have nothing more to do, and fsck completed. | |
$task = get_next_task($tmp_rs); | |
if ($task === FALSE || ($task->action != 'fsck' && $task->action != 'fsck_file')) { | |
gh_log(DEBUG, "MD5 worker thread for " . implode(', ', $drives) . " will now exit; it has nothing more to do."); | |
#gh_log(DEBUG, "Current task: " . var_export($task, TRUE)); | |
break; | |
} | |
sleep(5); | |
continue; | |
} | |
$last_check_time = time(); | |
gh_log(INFO, "Working on MD5 task ID $task->id: $task->additional_info"); | |
$md5 = md5_file($task->additional_info); | |
gh_log(DEBUG, " MD5 for $task->additional_info = $md5"); | |
$update_query = sprintf("UPDATE tasks SET complete = 'yes', additional_info = '%s' WHERE id = $task->id", db_escape_string("$task->additional_info=$md5")); | |
db_query($update_query) or gh_log(CRITICAL, "Can't update md5 task: " . db_error()); | |
} | |
} | |
function gh_check_md5($task) { | |
global $shares_options; | |
$share_options = $shares_options[$task->share]; | |
$query = sprintf("SELECT complete, COUNT(*) AS num, GROUP_CONCAT(id) AS ids FROM tasks WHERE action = 'md5' AND share = '%s' AND full_path = '%s' GROUP BY complete ORDER BY complete ASC", | |
db_escape_string($task->share), | |
db_escape_string($task->full_path) | |
); | |
$result = db_query($query) or gh_log(CRITICAL, "Can't query complete md5 tasks: " . db_error()); | |
$complete_tasks = db_fetch_object($result); // ORDER BY complete ASC in the above query will always return complete='yes' first, and there will always be at least one ($task) | |
$incomplete_tasks = db_fetch_object($result); | |
if ($incomplete_tasks === FALSE) { | |
$incomplete_tasks = array(); | |
} | |
if (count($incomplete_tasks) > 0) { | |
// We don't have all of them yet. Let's post-pone this until we do. | |
$query = sprintf("INSERT INTO tasks (action, share, full_path, additional_info, complete) SELECT action, share, full_path, additional_info, complete FROM tasks WHERE id = %d", $task->id); | |
db_query($query) or gh_log(CRITICAL, "Can't postpone md5 task: " . db_error()); | |
// If there's no worker thread alive, spawn all of them. The idle ones will just die. | |
$num_worker_threads = (int) trim(exec("ps x | grep '/usr/bin/greyhole --md5-worker' | grep -v grep | wc -l")); | |
if ($num_worker_threads == 0) { | |
gh_log(DEBUG, " Will spawn new worker threads to work on this."); | |
global $storage_pool_drives; | |
foreach ($storage_pool_drives as $sp_drive) { | |
spawn_thread('md5-worker', array($sp_drive)); | |
} | |
} else { | |
// Give the worker thread some time to catch up | |
gh_log(DEBUG, " Will wait some to allow for MD5 worker threads to complete."); | |
sleep(5); | |
} | |
return; | |
} | |
// We have all of them; let's check the MD5 checksums | |
gh_log(DEBUG, "Checking MD5 checksums for " . clean_dir("$task->share/$task->full_path")); | |
$result_tasks = db_query("SELECT * FROM tasks WHERE id IN ($complete_tasks->ids)") or gh_log(CRITICAL, "Can't fetch complete md5 tasks: " . db_error()); | |
$md5s = array(); | |
while ($t = db_fetch_object($result_tasks)) { | |
if (preg_match('/^(.+)=([0-9a-f]{32})$/', $t->additional_info, $regs)) { | |
$md5s[$regs[2]][] = clean_dir($regs[1]); | |
} else { | |
$md5s['unreadable files'][] = clean_dir($t->additional_info); | |
} | |
} | |
if (count($md5s) == 1) { | |
$md5s = array_keys($md5s); | |
$md5 = reset($md5s); | |
if ($md5 == 'unreadable files') { | |
// Oopsy! | |
$logs = array( | |
" The following file is unreadable: " . clean_dir($t->additional_info), | |
" The underlying filesystem probably contains errors. You should unmount that partition, and check it using e2fsck -cfp" | |
); | |
} else { | |
gh_log(DEBUG, " All copies have the same MD5 checksum: $md5"); | |
} | |
} | |
else if (count($md5s) > 1) { | |
// Oopsy! | |
$logs = array("Mismatch in file copies checksums:"); | |
foreach ($md5s as $md5 => $file_copies) { | |
$files = ''; | |
foreach ($file_copies as $file_copy) { | |
if ($files != '') { | |
$files .= ', '; | |
} | |
$files .= $file_copy; | |
} | |
// Automatically fix this if: | |
// - there's only 2 different MD5s for all file copies (i.e. one for all other files copies, and one for this file copy) | |
// - the current MD5 is only for one file copy (we assume this copy is in error, not the others) | |
// - that file copy isn't used as the share symlink target | |
$latest_file_copy = $file_copy; | |
$original_file_path = clean_dir(readlink(get_share_landing_zone($task->share) . "/" . $task->full_path)); | |
if (count($md5s) == 2 && count($file_copies) == 1 && $latest_file_copy != $original_file_path) { | |
$original_md5 = 'Unknown'; | |
foreach ($md5s as $this_md5 => $file_copies) { | |
foreach ($file_copies as $file_copy) { | |
if ($file_copy == $original_file_path) { | |
$original_md5 = $this_md5; | |
break; | |
} | |
} | |
} | |
gh_log(WARN, " A file copy with a different checksum than the original was found: $latest_file_copy = $md5. Original: $original_file_path = $original_md5"); | |
gh_log(WARN, " This copy will be deleted, and replaced with a new copy from $original_file_path"); | |
gh_recycle($latest_file_copy); | |
$metafiles = array(); | |
list($path, $filename) = explode_full_path($task->full_path); | |
foreach (get_metafiles($task->share, $path, $filename, TRUE, TRUE, FALSE) as $existing_metafiles) { | |
foreach ($existing_metafiles as $key => $metafile) { | |
$metafiles[$key] = $metafile; | |
} | |
} | |
create_copies_from_metafiles($metafiles, $task->share, $task->full_path, $original_file_path, TRUE); | |
gh_log(DEBUG, " Calculating MD5 for new file copy at $latest_file_copy ..."); | |
$md5 = md5_file($latest_file_copy); | |
gh_log(DEBUG, " MD5 = $md5"); | |
if ($md5 == $original_md5) { | |
gh_log(DEBUG, " All copies have the same MD5 checksum: $md5"); | |
delete_tasks($complete_tasks->ids); | |
return; | |
} | |
} | |
$logs[] = " [$md5] => $files"; | |
if (count($file_copies) == 1) { | |
$logs[] = " The above file is unreadable."; | |
$logs[] = " The underlying filesystem probably contains errors. You should unmount that partition, and check it using e2fsck -cfp"; | |
} else { | |
$logs[] = " The above files are unreadable."; | |
$logs[] = " The underlying filesystems probably contains errors. You should unmount those partitions, and check them using e2fsck -cfp"; | |
} | |
} | |
$logs[] = "You should manually check which file copy is invalid, and delete it. Re-create a valid copy with:"; | |
$logs[] = " greyhole --fsck --dir " . escapeshellarg(dirname(clean_dir($share_options['landing_zone'] . "/$task->full_path"))); | |
} | |
if (isset($logs)) { | |
// Write to greyhole.log | |
foreach ($logs as $log) { | |
gh_log(ERROR, $log); | |
} | |
// Write in fsck_checksums.log too | |
$flog = fopen(FSCKLogFile::PATH . '/fsck_checksums.log', 'a'); | |
if (!$flog) { | |
gh_log(CRITICAL, "Couldn't open log file: " . FSCKLogFile::PATH . "/fsck_checksums.log"); | |
} | |
fwrite($flog, $date = date("M d H:i:s") . ' ' . implode("\n", $logs) . "\n\n"); | |
fclose($flog); | |
unset($logs); | |
} | |
delete_tasks($complete_tasks->ids); | |
} | |
function delete_tasks($task_ids) { | |
db_query("DELETE FROM tasks WHERE id IN ($task_ids)") or gh_log(CRITICAL, "Can't delete tasks: " . db_error()); | |
global $result_new_tasks; | |
db_free_result($result_new_tasks); | |
$result_new_tasks = null; | |
} | |
function set_fsck_options($task) { | |
global $options; | |
$options['all-fsck-options'] = $task->additional_info; | |
$options['disk-usage-report'] = task_has_option($task, 'du'); | |
$options['find-orphans'] = task_has_option($task, 'orphaned'); | |
$options['verify-checksums'] = task_has_option($task, 'checksums'); | |
$options['del-orphaned-metadata'] = task_has_option($task, 'del-orphaned-metadata'); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment