Skip to content

Commit

Permalink
Sync from server repo (23fe7654cb3)
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Spilchen committed Feb 12, 2024
1 parent 9b60ae5 commit 8488877
Show file tree
Hide file tree
Showing 17 changed files with 656 additions and 86 deletions.
11 changes: 11 additions & 0 deletions commands/cmd_show_restore_points.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@ func makeCmdShowRestorePoints() *CmdShowRestorePoints {
showRestorePointsOptions.ConfigDirectory = newCmd.parser.String("config-directory", "",
util.GetOptionalFlagMsg("Directory where "+vclusterops.ConfigFileName+" is located"))

showRestorePointsOptions.FilterOptions.ArchiveName = newCmd.parser.String("restore-point-archive", "",
util.GetOptionalFlagMsg("Archive name to filter restore points with"))
showRestorePointsOptions.FilterOptions.ArchiveID = newCmd.parser.String("restore-point-id", "",
util.GetOptionalFlagMsg("ID to filter restore points with"))
showRestorePointsOptions.FilterOptions.ArchiveIndex = newCmd.parser.String("restore-point-index", "",
util.GetOptionalFlagMsg("Index to filter restore points with"))
showRestorePointsOptions.FilterOptions.StartTimestamp = newCmd.parser.String("start-timestamp", "",
util.GetOptionalFlagMsg("Only show restores points created no earlier than this"))
showRestorePointsOptions.FilterOptions.EndTimestamp = newCmd.parser.String("end-timestamp", "",
util.GetOptionalFlagMsg("Only show restores points created no later than this"))

newCmd.showRestorePointsOptions = &showRestorePointsOptions

return newCmd
Expand Down
20 changes: 17 additions & 3 deletions commands/cmd_stop_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package commands

import (
"flag"
"fmt"
"strconv"

"github.com/vertica/vcluster/vclusterops"
Expand Down Expand Up @@ -58,6 +59,10 @@ func makeCmdStopDB() *CmdStopDB {
util.GetOptionalFlagMsg("Forcefully use the user's input instead of reading the options from "+vclusterops.ConfigFileName))
stopDBOptions.ConfigDirectory = newCmd.parser.String("config-directory", "",
util.GetOptionalFlagMsg("Directory where "+vclusterops.ConfigFileName+" is located"))
stopDBOptions.Sandbox = newCmd.parser.String("sandbox", "",
util.GetOptionalFlagMsg("Name of the sandbox where Database has to be stopped"))
stopDBOptions.MainCluster = newCmd.parser.Bool("main-cluster-only", false, util.GetOptionalFlagMsg("stop db only on the main cluster"+
" Use it when there are sandboxes involved "))

// Eon flags
newCmd.isEon = newCmd.parser.Bool("eon-mode", false, util.GetEonFlagMsg("indicate if the database is an Eon db."+
Expand Down Expand Up @@ -110,7 +115,6 @@ func (c *CmdStopDB) Parse(inputArgv []string, logger vlog.Printer) error {
if !util.IsOptionSet(c.parser, "config-directory") {
c.stopDBOptions.ConfigDirectory = nil
}

return c.validateParse(logger)
}

Expand Down Expand Up @@ -142,7 +146,17 @@ func (c *CmdStopDB) Run(vcc vclusterops.VClusterCommands) error {
vcc.Log.Error(err, "failed to stop the database")
return err
}

vcc.Log.PrintInfo("Stopped a database with name %s", *options.DBName)
msg := fmt.Sprintf("Stopped a database with name %s", *options.DBName)
if *options.Sandbox != "" {
sandboxMsg := fmt.Sprintf(" on sandbox %s", *options.Sandbox)
vcc.Log.PrintInfo(msg + sandboxMsg)
return nil
}
if *options.MainCluster {
stopMsg := " on main cluster"
vcc.Log.PrintInfo(msg + stopMsg)
return nil
}
vcc.Log.PrintInfo(msg)
return nil
}
2 changes: 1 addition & 1 deletion vclusterops/cluster_op_engine_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type opEngineExecContext struct {
// This field is specifically used for sandboxing
// as sandboxing requires all nodes in the subcluster to be sandboxed to be UP.
upScInfo map[string]string // map with UP hosts as keys and their subcluster names as values.
sandboxingHosts []string // List of hosts that should run sandboxing command
upHostsToSandboxes map[string]string // map with UP hosts as keys and their corresponding sandbox names as values.
defaultSCName string // store the default subcluster name of the database
hostsWithLatestCatalog []string
primaryHostsWithLatestCatalog []string
Expand Down
107 changes: 100 additions & 7 deletions vclusterops/https_check_db_running_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ const (
StopDB
StartDB
ReviveDB

opName = "HTTPSCheckDBRunningOp"
)

func (op opType) String() string {
Expand Down Expand Up @@ -66,19 +68,42 @@ func (e *DBIsRunningError) Error() string {
type httpsCheckRunningDBOp struct {
opBase
opHTTPSBase
opType opType
opType opType
sandbox string // check if DB is running on specified sandbox
mainCluster bool // check if DB is running on the main cluster.
}

func makeHTTPSCheckRunningDBOp(logger vlog.Printer, hosts []string,
useHTTPPassword bool, userName string,
httpsPassword *string, operationType opType,
) (httpsCheckRunningDBOp, error) {
op := httpsCheckRunningDBOp{}
op.name = "HTTPSCheckDBRunningOp"
op.name = opName
op.logger = logger.WithName(op.name)
op.hosts = hosts
op.useHTTPPassword = useHTTPPassword
err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName)
if err != nil {
return op, err
}

op.userName = userName
op.httpsPassword = httpsPassword
op.opType = operationType
return op, nil
}

func makeHTTPSCheckRunningDBWithSandboxOp(logger vlog.Printer, hosts []string,
useHTTPPassword bool, userName string, sandbox string, mainCluster bool,
httpsPassword *string, operationType opType,
) (httpsCheckRunningDBOp, error) {
op := httpsCheckRunningDBOp{}
op.name = opName
op.logger = logger.WithName(op.name)
op.hosts = hosts
op.useHTTPPassword = useHTTPPassword
op.sandbox = sandbox // check if DB is running on specified sandbox
op.mainCluster = mainCluster // check if DB is running on the main cluster
err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName)
if err != nil {
return op, err
Expand Down Expand Up @@ -198,6 +223,25 @@ func (op *httpsCheckRunningDBOp) isDBRunningOnHost(host string,
return status, msg, nil
}

func (op *httpsCheckRunningDBOp) accumulateSandboxedAndMainHosts(sandboxingHosts map[string]string,
mainClusterHosts map[string]struct{}, nodesState *nodesStateInfo) {
if op.sandbox == "" || !op.mainCluster {
return
}

nodeList := nodesState.NodeList
if len(nodeList) > 0 {
for _, node := range nodeList {
if node.Sandbox == op.sandbox && op.sandbox != "" {
sandboxingHosts[node.Address] = node.State
}
if op.mainCluster && node.Sandbox == "" {
mainClusterHosts[node.Address] = struct{}{}
}
}
}
}

// processResult will look at all of the results that come back from the hosts.
// We don't return an error if all of the nodes are down. Otherwise, an error is
// returned.
Expand All @@ -209,6 +253,8 @@ func (op *httpsCheckRunningDBOp) processResult(_ *opEngineExecContext) error {
upHosts := make(map[string]bool)
downHosts := make(map[string]bool)
exceptionHosts := make(map[string]bool)
sandboxedHosts := make(map[string]string)
mainClusterHosts := make(map[string]struct{})
// print msg
msg := ""
for host, result := range op.clusterHTTPRequest.ResultCollection {
Expand All @@ -226,6 +272,7 @@ func (op *httpsCheckRunningDBOp) processResult(_ *opEngineExecContext) error {
}

upHosts[host] = true

// a passing result means that the db isn't down
nodesStates := nodesStateInfo{}
err := op.parseAndCheckResponse(host, result.content, &nodesStates)
Expand All @@ -237,6 +284,9 @@ func (op *httpsCheckRunningDBOp) processResult(_ *opEngineExecContext) error {
msg = result.content
continue
}

op.accumulateSandboxedAndMainHosts(sandboxedHosts, mainClusterHosts, &nodesStates)

status, checkMsg, err := op.isDBRunningOnHost(host, &nodesStates, result)
if err != nil {
return fmt.Errorf("[%s] error happened during checking DB running on host %s, details: %w",
Expand All @@ -247,13 +297,16 @@ func (op *httpsCheckRunningDBOp) processResult(_ *opEngineExecContext) error {
msg = checkMsg
}

return op.handleDBRunning(allErrs, msg, upHosts, downHosts, exceptionHosts)
return op.handleDBRunning(allErrs, msg, upHosts, downHosts, exceptionHosts, sandboxedHosts, mainClusterHosts)
}

func (op *httpsCheckRunningDBOp) handleDBRunning(allErrs error, msg string, upHosts, downHosts, exceptionHosts map[string]bool) error {
op.logger.Info("check db running results", "up hosts", upHosts, "down hosts", downHosts, "hosts with status unknown", exceptionHosts)
// no DB is running on hosts, return a passed result
if len(upHosts) == 0 {
func (op *httpsCheckRunningDBOp) handleDBRunning(allErrs error, msg string, upHosts, downHosts, exceptionHosts map[string]bool,
sandboxedHosts map[string]string, mainClusterHosts map[string]struct{}) error {
op.logger.Info("check db running results", "up hosts", upHosts, "down hosts", downHosts, "hosts with status unknown", exceptionHosts,
"sandboxed hosts", sandboxedHosts)

dbDown := op.checkProcessedResult(sandboxedHosts, mainClusterHosts, upHosts)
if dbDown {
return nil
}
op.logger.Info("Check DB running", "detail", msg)
Expand All @@ -273,6 +326,46 @@ func (op *httpsCheckRunningDBOp) handleDBRunning(allErrs error, msg string, upHo
return errors.Join(allErrs, &DBIsRunningError{Detail: msg})
}

func (op *httpsCheckRunningDBOp) checkProcessedResult(sandboxedHosts map[string]string,
mainClusterHosts map[string]struct{}, upHosts map[string]bool) bool {
// no DB is running on hosts, return a passed result
if len(upHosts) == 0 {
if op.sandbox != "" || op.mainCluster {
op.logger.PrintWarning("All the nodes in the database are down")
}
return true
}

// Check if any of the sandboxed hosts is UP
// sandboxedHosts would be empty if op.sandbox is ""
isSandboxUp := false
for host := range sandboxedHosts {
if _, ok := upHosts[host]; ok {
isSandboxUp = true
break
}
}

isMainHostUp := false
for host := range mainClusterHosts {
if _, ok := upHosts[host]; ok {
isMainHostUp = true
break
}
}

// If all sandboxed hosts are down, DB is down for the given sandbox
if !isSandboxUp && op.sandbox != "" {
op.logger.Info("all hosts in the sandbox: " + op.sandbox + " are down")
return true
}
if !isMainHostUp && op.mainCluster {
op.logger.Info("all hosts in the main cluster are down")
return true
}
return false
}

func (op *httpsCheckRunningDBOp) execute(execContext *opEngineExecContext) error {
op.logger.Info("Execute() called", "opType", op.opType)
switch op.opType {
Expand Down
33 changes: 17 additions & 16 deletions vclusterops/https_check_subcluster_sandbox_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ type scResps struct {
func (op *httpsCheckSubclusterSandboxOp) processResult(execContext *opEngineExecContext) error {
var allErrs error
keysToRemove := make(map[string]struct{})
existingSandboxedHosts := make(map[string]struct{})
mainClusterHosts := make(map[string]struct{})
existingSandboxedHosts := make(map[string]string)
mainClusterHosts := make(map[string]string)

for host, result := range op.clusterHTTPRequest.ResultCollection {
op.logResponse(host, result)
Expand Down Expand Up @@ -136,47 +136,48 @@ func (op *httpsCheckSubclusterSandboxOp) processResult(execContext *opEngineExec

// Process sandboxing info
for _, scInfo := range subclusterResp.SCInfoList {
mainHosts, removeHosts, sandboxedHosts := op.processScInfo(scInfo, execContext)
mainHosts, sandboxedHosts, removeHosts := op.processScInfo(scInfo, execContext)
// Accumulate maincluster hosts, hosts to be removed
// and hosts that are sandboxed
for _, host := range mainHosts {
mainClusterHosts[host] = struct{}{}
for host, sb := range mainHosts {
mainClusterHosts[host] = sb
}
for h := range removeHosts {
keysToRemove[h] = struct{}{}
}
for h := range sandboxedHosts {
existingSandboxedHosts[h] = struct{}{}
for h, sb := range sandboxedHosts {
existingSandboxedHosts[h] = sb
}
}
}

// Use updated scInfo
for host := range existingSandboxedHosts {
for host, sb := range existingSandboxedHosts {
// Just need one up host from the existing sandbox
// This will be used to add new subcluster to an existing sandbox
execContext.sandboxingHosts = append(execContext.sandboxingHosts, host)
execContext.upHostsToSandboxes[host] = sb
break
}

for host := range mainClusterHosts {
for host, sb := range mainClusterHosts {
if _, exists := keysToRemove[host]; !exists {
// Just one up host from main cluster
execContext.sandboxingHosts = append(execContext.sandboxingHosts, host)
execContext.upHostsToSandboxes[host] = sb
break
}
}
return allErrs
}
func (op *httpsCheckSubclusterSandboxOp) processScInfo(scInfo subclusterSandboxInfo,
execContext *opEngineExecContext) (mainClusterHosts []string, keysToRemove, existingSandboxedHosts map[string]struct{}) {
execContext *opEngineExecContext) (mainClusterHosts, existingSandboxedHosts map[string]string, keysToRemove map[string]struct{}) {
keysToRemove = make(map[string]struct{})
mainClusterHosts = make(map[string]string)
for host, sc := range execContext.upScInfo {
if scInfo.Sandbox != "" && scInfo.SCName == sc {
keysToRemove, existingSandboxedHosts = op.processSandboxedSCInfo(scInfo, sc, host)
} else {
if scInfo.SCName == sc {
mainClusterHosts = append(mainClusterHosts, host)
mainClusterHosts[host] = scInfo.Sandbox
}
// We do not want a host from the sc to be sandboxed to be the initiator
if sc == op.ScToSandbox {
Expand All @@ -188,16 +189,16 @@ func (op *httpsCheckSubclusterSandboxOp) processScInfo(scInfo subclusterSandboxI
}

func (op *httpsCheckSubclusterSandboxOp) processSandboxedSCInfo(scInfo subclusterSandboxInfo,
sc, host string) (keysToRemove, existingSandboxedHosts map[string]struct{}) {
sc, host string) (keysToRemove map[string]struct{}, existingSandboxedHosts map[string]string) {
keysToRemove = make(map[string]struct{})
existingSandboxedHosts = make(map[string]struct{})
existingSandboxedHosts = make(map[string]string)
if scInfo.Sandbox != op.Sandbox {
op.logger.Info("subcluster " + sc + " is sandboxed")
if scInfo.SCName == sc {
keysToRemove[host] = struct{}{}
}
} else {
existingSandboxedHosts[host] = struct{}{}
existingSandboxedHosts[host] = scInfo.Sandbox
}
return
}
Expand Down
Loading

0 comments on commit 8488877

Please sign in to comment.