Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

Fixed Hyper-V VM remaining in invalid state #4012

Merged
merged 1 commit into from
Mar 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion golem/docker/commands/docker_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class DockerMachineCommandHandler(DockerCommandHandler):
start=['docker-machine', '--native-ssh', 'start'],
stop=['docker-machine', '--native-ssh', 'stop'],
active=['docker-machine', '--native-ssh', 'active'],
list=['docker-machine', '--native-ssh', 'ls', '-q'],
# DON'T use the '-q' option. It doesn't list VMs in invalid state
list=['docker-machine', '--native-ssh', 'ls'],
env=['docker-machine', '--native-ssh', 'env'],
status=['docker-machine', '--native-ssh', 'status'],
inspect=['docker-machine', '--native-ssh', 'inspect'],
Expand Down
6 changes: 5 additions & 1 deletion golem/docker/hypervisor/docker_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,16 @@ def restart_ctx(self, name: Optional[str] = None):
@property
def vms(self):
try:
# DON'T use the '-q' option. It doesn't list VMs in invalid state
output = self.command('list')
except subprocess.CalledProcessError as e:
logger.warning("DockerMachine: failed to list VMs: %r", e)
else:
if output:
return [i.strip() for i in output.split("\n") if i]
# Skip first line (header) and last (empty)
lines = output.split('\n')[1:-1]
# Get the first word of each line
return [l.strip().split()[0] for l in lines]
return []

@property
Expand Down
24 changes: 19 additions & 5 deletions golem/docker/hypervisor/hyperv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from pathlib import Path
import subprocess
import time
from typing import Any, ClassVar, Dict, Iterable, List, Optional

from os_win.constants import HOST_SHUTDOWN_ACTION_SAVE, \
Expand Down Expand Up @@ -90,6 +91,7 @@ class HyperVHypervisor(DockerMachineHypervisor):
VOLUME_SIZE = "5000" # = 5GB; default was 20GB
VOLUME_DRIVER = "cifs"
SMB_PORT = "445"
REMOVE_VM_RETRY_LIMIT = 10

SCRIPTS_PATH = os.path.join(get_golem_path(), 'scripts', 'docker')
GET_VSWITCH_SCRIPT_PATH = \
Expand Down Expand Up @@ -232,13 +234,25 @@ def _parse_create_params(
def _failed_to_create(self, vm_name: Optional[str] = None):
name = vm_name or self._vm_name
logger.error(
f'{ self.DRIVER_NAME}: VM failed to create, this can be '
f'{self.DRIVER_NAME}: VM failed to create, this can be '
'caused by insufficient RAM or HD free on the host machine')
try:
self.command('rm', name, args=['-f'])
except subprocess.CalledProcessError:

# Removing the VM sometimes fails so let's try a few times
for _ in range(self.REMOVE_VM_RETRY_LIMIT):
if name not in self.vms:
break

try:
self.command('rm', name, args=['-f'])
except subprocess.CalledProcessError:
logger.warning(f'{self.DRIVER_NAME}: Attempt to remove '
f'machine "{name}" failed')
time.sleep(0.1)

else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this always trigger on the last iteration?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was the idea. If all the iterations passed then we assume that removing the VM failed. If it succeeded then break would be triggered and else clause wouldn't be executed.

# Log error if all attempts failed
logger.error(
f'{ self.DRIVER_NAME}: Failed to clean up a (possible) '
f'{self.DRIVER_NAME}: Failed to clean up a (possible) '
'corrupt machine, please run: '
f'`docker-machine rm -y -f {name}`')

Expand Down