From a05bf6de74d4677f446e39bad07365534afcd8b7 Mon Sep 17 00:00:00 2001 From: CJ_Clippy Date: Sun, 12 Oct 2025 14:41:45 -0800 Subject: [PATCH] use worker placement constraints --- ansible/ansible.cfg | 3 +- ansible/inventory/terraform.yml | 2 +- ansible/requirements.yml | 4 +- ansible/roles/common/tasks/main.yml | 22 ++--- ansible/roles/docker/tasks/docker.yml | 52 ------------ ansible/roles/docker/tasks/main.yml | 5 -- ansible/roles/swarm/tasks/manager.yml | 16 ++++ ansible/site.yml | 7 +- services/our/compose.production.yaml | 3 + terraform/.terraform.lock.hcl | 40 ++++----- terraform/main.tf | 116 +++++++++++++++++--------- 11 files changed, 131 insertions(+), 139 deletions(-) delete mode 100644 ansible/roles/docker/tasks/docker.yml delete mode 100644 ansible/roles/docker/tasks/main.yml diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 1ab5ce6..10880cd 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -1,7 +1,6 @@ [defaults] - inventory = ./inventory/terraform.yml private_key_file = ~/.ssh/futureporn2025 remote_user = root interpreter_python=auto_silent -nocows = 1 \ No newline at end of file +nocows = 1 diff --git a/ansible/inventory/terraform.yml b/ansible/inventory/terraform.yml index 05cd8b8..1b0e6e3 100644 --- a/ansible/inventory/terraform.yml +++ b/ansible/inventory/terraform.yml @@ -1,4 +1,4 @@ plugin: cloud.terraform.terraform_provider project_path: ../terraform -# Terraform binary (available in the $PATH) or full path to the binary. binary_path: tofu + diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 197e362..137b76a 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -3,9 +3,7 @@ collections: - name: cloud.terraform - name: community.docker - name: community.general - - name: gluster.gluster + roles: - name: nvjacobo.caddy - name: geerlingguy.nodejs - - name: artis3n.tailscale - - name: bpbradley.komodo \ No newline at end of file diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index b899195..3cef13b 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -39,21 +39,17 @@ mode: "0644" remote_src: true -- name: Download lazydocker installer - ansible.builtin.get_url: - url: https://raw.githubusercontent.com/jesseduffield/lazydocker/master/scripts/install_update_linux.sh - dest: /tmp/lazydocker.sh - mode: "0755" - -- name: Install lazydocker - environment: - DIR: /usr/local/bin - ansible.builtin.command: /tmp/lazydocker.sh - args: - creates: /usr/local/bin/lazydocker - - name: Allow UDP ports 60000-61000 for mosh community.general.ufw: rule: allow port: "60000:61000" proto: udp + +- name: Install ansible module dependencies + ansible.builtin.pip: + name: "{{ item }}" + state: present + loop: + - jsondiff + - pyyaml + - docker diff --git a/ansible/roles/docker/tasks/docker.yml b/ansible/roles/docker/tasks/docker.yml deleted file mode 100644 index dbd1704..0000000 --- a/ansible/roles/docker/tasks/docker.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -- name: Ensure prerequisites are installed - become: true - ansible.builtin.apt: - name: - - ca-certificates - - curl - state: present - update_cache: true - -- name: Ensure /etc/apt/keyrings directory exists - become: true - ansible.builtin.file: - path: /etc/apt/keyrings - state: directory - mode: "0755" - -- name: Download Docker GPG key - become: true - ansible.builtin.get_url: - url: https://download.docker.com/linux/ubuntu/gpg - dest: /etc/apt/keyrings/docker.asc - mode: "0644" - -- name: Add Docker APT repository - become: true - ansible.builtin.apt_repository: - repo: "deb [arch={{ ansible_architecture }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_lsb.codename }} stable" - state: present - filename: docker - update_cache: true - -- name: Install docker - become: true - ansible.builtin.apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-buildx-plugin - - docker-compose-plugin - state: present - update_cache: true - -- name: Install docker ansible module dependencies - ansible.builtin.pip: - name: "{{ item }}" - state: present - loop: - - jsondiff - - pyyaml - - docker diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml deleted file mode 100644 index dc33207..0000000 --- a/ansible/roles/docker/tasks/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- - -- name: Install docker - ansible.builtin.include_tasks: - file: docker.yml diff --git a/ansible/roles/swarm/tasks/manager.yml b/ansible/roles/swarm/tasks/manager.yml index c5f83f6..bb27630 100644 --- a/ansible/roles/swarm/tasks/manager.yml +++ b/ansible/roles/swarm/tasks/manager.yml @@ -22,3 +22,19 @@ - name: Get manager join token ansible.builtin.set_fact: swarm_manager_join_token: "{{ swarm_create.swarm_facts.JoinTokens.Manager }}" + +- name: Get info on all swarm nodes + community.docker.docker_node_info: + register: swarm_node_infos + +- name: Debug swarm node info + debug: + var: swarm_node_infos + +- name: Set node labels based on hostvars + community.docker.docker_node: + hostname: "{{ item }}" + labels: + capabilities_gpu: "{{ hostvars[item].capabilities_gpu | default('false') }}" + capabilities_dedicated_cpu: "{{ hostvars[item].capabilities_dedicated_cpu | default('false') }}" + loop: "{{ groups['swarm'] }}" diff --git a/ansible/site.yml b/ansible/site.yml index 5e8044d..9822b90 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -1,7 +1,7 @@ --- - name: Bootstrap - hosts: all + hosts: swarm gather_facts: false ## required because ansible_host may not have python check_mode: false become: false @@ -9,13 +9,13 @@ - bootstrap - name: Assert common dependencies - hosts: all + hosts: swarm gather_facts: true check_mode: false become: true roles: - common - - docker + - name: Set up docker swarm hosts: swarm @@ -71,7 +71,6 @@ - ipfs - # - name: Install Capture instance # hosts: capture # gather_facts: true diff --git a/services/our/compose.production.yaml b/services/our/compose.production.yaml index a0e5cd5..291fb0c 100644 --- a/services/our/compose.production.yaml +++ b/services/our/compose.production.yaml @@ -39,6 +39,9 @@ services: limits: cpus: "0.5" memory: 2G + placement: + constraints: + - node.labels.capabilities_dedicated_cpu == true environment: CACHE_ROOT: /mnt/vfs/futureporn/our diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl index 9a92f7c..be9a412 100644 --- a/terraform/.terraform.lock.hcl +++ b/terraform/.terraform.lock.hcl @@ -25,28 +25,28 @@ provider "registry.opentofu.org/ansible/ansible" { } provider "registry.opentofu.org/bunnyway/bunnynet" { - version = "0.7.5" + version = "0.10.4" hashes = [ - "h1:RV1mRPnZqgwzQXXA+MeU7wwyRxpq8UkdyH9asQQhByU=", - "zh:06306f778155271c5789d4e334a2db37d4f53b2e3edd3ac55c6de7b21a59bc0f", - "zh:0eb4ef1871cc48fddfe66c6c0816b24c358b5616cb106acd89837985e9abc83f", - "zh:10aa641396252f8ee6f76c6f248d8f30c3415f9230b3a486ad4ff06badfcd292", - "zh:21098155a58c9461e2edfcf6cd7e24982bd0503a15ecc525382e78f09e19447c", - "zh:3fb5007a95d303a9e1ece1a4605824a454488ff55ecd1138ac7c268a83f19e47", - "zh:4dc4fb634039f78c054b63965a468c3bd55066b9531af7900297732f2a3639a2", - "zh:52f59d797020284971a1f4b7f5079c99db7aa42dac2ec01c3dced69896295342", - "zh:61c83423bcbe3af6eaa6cdef56525a47ddc5109f90a2a5b58d73cf2b390195ca", - "zh:70d82268c8f7478013fa6c3eb90da777a590dab0e24dbd07807492199722e85e", - "zh:7509c60199aa5c7b2cef91a8b46c38336d7ab3f86e3fadbaf4e9ffc1095551a3", - "zh:81264d9480f8344e963556acc8b6f9bf90731759c5f565abe7200b69de591f0f", - "zh:85c4ba0cdf98b551235ae062f2d85eda5291d1ebad227f8ba5da48515d751a35", + "h1:AsUZ28mqc+/DZA97uzPfbyR5tlMoP2+8OeIJOS8EOfs=", + "zh:03259f0ce1fb4089f0aee1e9594c7e8149da0866a0c4214377c94a29d96acc7e", + "zh:077eaa0086d496856217073b0ead28ac36eeb8c46e2ce9d34b40bedaf473030e", + "zh:14ea2781b3664611839ee53627500be07f90810db9ecc0adec5f24d4e848a030", + "zh:1f4d1b09d9cb3ca303c8d956c38e8c82c9413ec1380e2a7342c7713847a30617", + "zh:26e5ae7c47359d09a9736b7906e364f8ad5627e8cc6e03900cde6c22c9305343", + "zh:276008be540327190ca1cd5e038e6d94160bbe682497f63a62faeb659b0ba8cf", + "zh:34d148cbe660707bfb801e7a1b86097bbab20e3d78ec611affb66c70417d31ec", + "zh:4dabf1ac6dad637807dbe69805d3fcfddf9ce67b2855a7dbf77492602e9efbe9", + "zh:57726a539c6285bb7bcd1a8eecc64270d196b43b60c1b02c11320cc96f2999f6", + "zh:61b11c5174f41736422f32650ba4c0598d8c6676c392518dc3b675e2e79a5fc5", + "zh:7b39827ac4f5bc0de5e1312c3733f959798a6bda54d01b68c0148f188a94e607", + "zh:82fb99aa52ce35d1b0f8aaf76e1e308b08ec689bfc4a710b3e147cee6700dfa8", "zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f", - "zh:8ef1587c0337a0c5f65438f81801189a5bc17f51d3b94799c1cac5e8ff178509", - "zh:971598dbee00d3e6e81d89c02aa097002bcc02ba074cb007917f148dda9f9b72", - "zh:a74055059bb15d58f6c092ce4ed1a7b114d6e8f7234a5299e4b32da2b87f334b", - "zh:abce1a748c7864669bc28c1e805bcebc60ccb258954644166e516b2adeac417a", - "zh:d039eaf0f39ab7cc0e63d28e5326f028e614979c2b6edbeb6d4d1ef349413a29", - "zh:d16ce72e16bc5a764b1caff5265880f87ec77fd8ac9b3edd1801b9d12ba65335", + "zh:8ce1c66f52c0d2e8c440d6ef844f316ab3274ed1f9b6db982f0f76033d7426cd", + "zh:ad6a6a90c6e5fdd398881186316b1038a8765dabe9988e9c4bd8c81836d6af82", + "zh:bfc5f959ba6dbba7ed3e6db35e87d054b864bc294ff0816224b857021008da4a", + "zh:e575d8324d1b75aef90e4dc987b845aadafbe92f6960f9daae1e28db230a425d", + "zh:f8de4f97830bac035d960e61c9241d144f61b2eea1e085dd779007a129cc9620", + "zh:fb54e316bcbb073fda89517551b58dd8841c96ad94d4f765928ca44ece0ab782", ] } diff --git a/terraform/main.tf b/terraform/main.tf index 3848bed..71f29f6 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -165,7 +165,7 @@ resource "bunnynet_dns_record" "www_future_porn" { # vultr instance for running our app resource "vultr_instance" "swarm_node" { - count = 2 + count = 1 hostname = "swarm-node-${count.index}" plan = "vc2-2c-4gb" region = "ord" @@ -174,7 +174,26 @@ resource "vultr_instance" "swarm_node" { os_id = 1743 enable_ipv6 = true label = "swarm node ${count.index}" - tags = ["fp", "swarm", "server", "tofu"] + tags = ["fp", "swarm", "tofu"] + ssh_key_ids = [local.envs.VULTR_SSH_KEY_ID] + vpc_ids = [ + vultr_vpc.futureporn_vpc.id + ] + user_data = base64encode(var.vps_user_data) +} + +# vultr instance with a dedicated CPU +resource "vultr_instance" "swarm_transcode_node" { + count = 1 + hostname = "swarm-transcode-node-${count.index}" + plan = "voc-c-2c-4gb-50s" # CPU optimized, 2 vCPU, 4GB RAM, 50GB NVMe + region = "ord" + backups = "disabled" + ddos_protection = "false" + os_id = 1743 + enable_ipv6 = true + label = "swarm transcode node ${count.index}" + tags = ["fp", "swarm", "transcode", "tofu"] ssh_key_ids = [local.envs.VULTR_SSH_KEY_ID] vpc_ids = [ vultr_vpc.futureporn_vpc.id @@ -204,26 +223,23 @@ resource "vultr_instance" "swarm_node" { -# vultr instance with a GPU. experimental. -# resource "vultr_instance" "capture_vps" { -# count = 0 -# hostname = "fp-cap-${count.index}" -# plan = "vcg-a16-2c-8g-2vram" -# region = "ord" -# backups = "disabled" -# ddos_protection = "false" -# # os_id = 1743 -# image_id = "ubuntu-xfce" -# app_variables = { -# desktopuser = "cj_clippy" -# } -# enable_ipv6 = true -# vpc_ids = [vultr_vpc.futureporn_vpc.id] -# label = "fp capture ${count.index}" -# tags = ["futureporn", "capture"] -# ssh_key_ids = [local.envs.VULTR_SSH_KEY_ID] -# user_data = base64encode(var.vps_user_data) -# } +# Vultr instance with a GPU. +# Caveat: No VFS available on GPU instances! @see https://docs.vultr.com/products/cloud-storage/vultr-file-system/attach-instances (comments) +resource "vultr_instance" "swarm_gpu_node" { + count = 0 + hostname = "swarm-gpu-node-${count.index}" + plan = "vcg-a16-2c-8g-2vram" # 1/8 NVIDIA A16, 2 VCPU, 8 GB RAM, 2 GB VRAM + region = "ord" + backups = "disabled" + ddos_protection = "false" + os_id = 1743 + enable_ipv6 = true + vpc_ids = [vultr_vpc.futureporn_vpc.id] + label = "swarm gpu node ${count.index}" + tags = ["fp", "swarm", "server", "tofu", "gpu"] + ssh_key_ids = [local.envs.VULTR_SSH_KEY_ID] + user_data = base64encode(var.vps_user_data) +} # This is our ipfs node with a really big dick, I mean disk @@ -306,6 +322,36 @@ resource "ansible_host" "swarm_node" { } } +resource "ansible_host" "swarm_gpu_node" { + for_each = { for idx, host in vultr_instance.swarm_gpu_node : idx => host } + name = each.value.hostname + groups = ["swarm"] + variables = { + ansible_host = each.value.main_ip + internal_ip = each.value.internal_ip + vultr_instance_id = each.value.id + vultr_vfs_storage_id = vultr_virtual_file_system_storage.vfs.id + swarm_enable_manager = false + swarm_enable_worker = true + capabilities_gpu = true + } +} + +resource "ansible_host" "swarm_transcode_node" { + for_each = { for idx, host in vultr_instance.swarm_transcode_node : idx => host } + name = each.value.hostname + groups = ["swarm"] + variables = { + ansible_host = each.value.main_ip + internal_ip = each.value.internal_ip + vultr_instance_id = each.value.id + vultr_vfs_storage_id = vultr_virtual_file_system_storage.vfs.id + swarm_enable_manager = false + swarm_enable_worker = true + capabilities_gpu = false + capabilities_dedicated_cpu = true + } +} # resource "ansible_host" "tracker" { @@ -319,18 +365,7 @@ resource "ansible_host" "swarm_node" { # } # } -resource "ansible_host" "swarm" { - for_each = { for idx, host in vultr_instance.swarm_node : idx => host } - name = each.value.hostname - groups = ["swarm"] - variables = { - ansible_host = each.value.main_ip - internal_ip = each.value.internal_ip - vultr_instance_id = each.value.id - vultr_vfs_storage_id = vultr_virtual_file_system_storage.vfs.id - } -} resource "vultr_virtual_file_system_storage" "vfs" { label = "fp-vfs-cache" @@ -338,16 +373,19 @@ resource "vultr_virtual_file_system_storage" "vfs" { region = "ord" tags = ["our", "vfs"] - attached_instances = vultr_instance.swarm_node[*].id + # Merge both instance lists and remove any nulls + # Note: GPU instances cannot be added to VFS! :( + attached_instances = concat( + vultr_instance.swarm_node[*].id, + vultr_instance.swarm_transcode_node[*].id + ) + + + } -# resource "ansible_group" "capture" { -# name = "capture" -# } - - resource "ansible_group" "swarm" { name = "swarm" }