diff --git a/.github/workflows/load-test-run.yml b/.github/workflows/load-test-run.yml new file mode 100644 index 0000000..6ccb7f4 --- /dev/null +++ b/.github/workflows/load-test-run.yml @@ -0,0 +1,108 @@ +name: Load Test Run + +on: + workflow_dispatch: + inputs: + vus: + description: "k6 VUs" + required: true + default: "10" + type: string + iterations: + description: "k6 iterations per VU" + required: true + default: "10" + type: string + max_duration: + description: "k6 max duration" + required: true + default: "15m" + type: string + target_base_url: + description: "Target base URL. Empty uses Terraform default." + required: false + default: "" + type: string + prometheus_remote_write_url: + description: "Prometheus remote-write URL. Empty uses Terraform default." + required: false + default: "" + type: string + destroy_runner: + description: "Destroy the k6 load generator after this run" + required: true + default: true + type: boolean + rebuild_k6: + description: "Rebuild the k6 binary before running" + required: true + default: false + type: boolean + +permissions: + id-token: write + contents: read + +concurrency: + group: load-test-environment + cancel-in-progress: false + +env: + TF_VERSION: "1.10.5" + +jobs: + run: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + token: ${{ secrets.GH_PAT }} + persist-credentials: false + + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_LOAD_TEST_ROLE_ARN }} + aws-region: ap-northeast-2 + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + terraform_wrapper: false + + - name: Install jq + run: | + sudo apt-get update + sudo apt-get install -y jq + + - name: Run k6 on load generator + env: + VUS: ${{ inputs.vus }} + ITERATIONS: ${{ inputs.iterations }} + MAX_DURATION: ${{ inputs.max_duration }} + TARGET_BASE_URL: ${{ inputs.target_base_url }} + PROMETHEUS_REMOTE_WRITE_URL: ${{ inputs.prometheus_remote_write_url }} + run: | + args=( + --vus "$VUS" + --iterations "$ITERATIONS" + --max-duration "$MAX_DURATION" + ) + + if [ -n "$TARGET_BASE_URL" ]; then + args+=(--target-base-url "$TARGET_BASE_URL") + fi + + if [ -n "$PROMETHEUS_REMOTE_WRITE_URL" ]; then + args+=(--prometheus-remote-write-url "$PROMETHEUS_REMOTE_WRITE_URL") + fi + + if [ "${{ inputs.destroy_runner }}" != "true" ]; then + args+=(--skip-runner-destroy) + fi + + if [ "${{ inputs.rebuild_k6 }}" = "true" ]; then + args+=(--rebuild-k6) + fi + + bash scripts/load_test/run_k6.sh "${args[@]}" diff --git a/.github/workflows/load-test-start.yml b/.github/workflows/load-test-start.yml new file mode 100644 index 0000000..fd1c160 --- /dev/null +++ b/.github/workflows/load-test-start.yml @@ -0,0 +1,63 @@ +name: Load Test Start + +on: + workflow_dispatch: + inputs: + switch_stage_to_loadtest: + description: "Restart stage app with dev,loadtest profiles" + required: true + default: true + type: boolean + +permissions: + id-token: write + contents: read + +concurrency: + group: load-test-environment + cancel-in-progress: false + +env: + TF_VERSION: "1.10.5" + +jobs: + start: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + token: ${{ secrets.GH_PAT }} + persist-credentials: false + + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_LOAD_TEST_ROLE_ARN }} + aws-region: ap-northeast-2 + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + terraform_wrapper: false + + - name: Install jq + run: | + sudo apt-get update + sudo apt-get install -y jq + + - name: Start load test environment + run: | + export TF_VAR_load_generator_instance_type="c7i.large" + export TF_VAR_create_load_generator="false" + + if [ -n "${{ vars.LOAD_GENERATOR_INSTANCE_PROFILE_NAME }}" ]; then + export TF_VAR_load_generator_instance_profile_name="${{ vars.LOAD_GENERATOR_INSTANCE_PROFILE_NAME }}" + fi + + args=() + + if [ "${{ inputs.switch_stage_to_loadtest }}" = "true" ]; then + args+=(--switch-stage-to-loadtest) + fi + + bash scripts/load_test/start.sh "${args[@]}" diff --git a/.github/workflows/load-test-stop.yml b/.github/workflows/load-test-stop.yml new file mode 100644 index 0000000..d5ceffa --- /dev/null +++ b/.github/workflows/load-test-stop.yml @@ -0,0 +1,65 @@ +name: Load Test Stop + +on: + workflow_dispatch: + inputs: + restore_stage_dev: + description: "Restart stage app with dev profile" + required: true + default: true + type: boolean + destroy_rds: + description: "Destroy load test Terraform stack" + required: true + default: true + type: boolean + +permissions: + id-token: write + contents: read + +concurrency: + group: load-test-environment + cancel-in-progress: false + +env: + TF_VERSION: "1.10.5" + +jobs: + stop: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + token: ${{ secrets.GH_PAT }} + persist-credentials: false + + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.AWS_LOAD_TEST_ROLE_ARN }} + aws-region: ap-northeast-2 + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TF_VERSION }} + terraform_wrapper: false + + - name: Install jq + run: | + sudo apt-get update + sudo apt-get install -y jq + + - name: Stop load test environment + run: | + args=() + + if [ "${{ inputs.restore_stage_dev }}" = "true" ]; then + args+=(--restore-stage-dev) + fi + + if [ "${{ inputs.destroy_rds }}" != "true" ]; then + args+=(--skip-terraform-destroy) + fi + + bash scripts/load_test/stop.sh "${args[@]}" diff --git a/config/load-test/k6/createPost.json b/config/load-test/k6/createPost.json new file mode 100644 index 0000000..e08b0d2 --- /dev/null +++ b/config/load-test/k6/createPost.json @@ -0,0 +1,7 @@ +{ + "boardCode": "FREE", + "postCategory": "자유", + "title": "수강신청 어떻게 하나요?", + "content": "수강신청 방법을 잘 모르겠어요.", + "isQuestion": false +} diff --git a/config/load-test/k6/set_up_xk6.sh b/config/load-test/k6/set_up_xk6.sh new file mode 100644 index 0000000..b0edaa5 --- /dev/null +++ b/config/load-test/k6/set_up_xk6.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +set -euo pipefail + +trap 'echo "xk6 setup failed" >&2' ERR + +export GO_VERSION=1.25.9 +export XK6_VERSION=v1.4.3 +export XK6_PROMETHEUS_REMOTE_VERSION=v0.5.1 +export BASE_DIR=/home/ubuntu/solid-connection-load-test/k6 +export GOROOT=${BASE_DIR}/go +export GOPATH=${BASE_DIR}/go-workspace +export PATH=$PATH:$GOROOT/bin:$GOPATH/bin +export XK6_BIN=${GOPATH}/bin/xk6 +export K6_OUT=experimental-prometheus-rw +export K6_PROMETHEUS_RW_SERVER_URL=${K6_PROMETHEUS_RW_SERVER_URL:-} +export K6_PROMETHEUS_RW_TREND_STATS="${K6_PROMETHEUS_RW_TREND_STATS:-p(90),p(95),p(99),avg,min,max}" + +{ + echo "export BASE_DIR=${BASE_DIR}" + echo "export GOROOT=${GOROOT}" + echo "export GOPATH=${GOPATH}" + echo "export PATH=\$PATH:\$GOROOT/bin:\$GOPATH/bin" + echo "export XK6_BIN=${GOPATH}/bin/xk6" + echo "export K6_OUT=experimental-prometheus-rw" + echo "export K6_PROMETHEUS_RW_SERVER_URL=${K6_PROMETHEUS_RW_SERVER_URL}" + echo "export K6_PROMETHEUS_RW_TREND_STATS=\"${K6_PROMETHEUS_RW_TREND_STATS}\"" +} >> ~/.bashrc + +echo "Create and enter ${BASE_DIR}" +mkdir -p "$BASE_DIR" +cd "$BASE_DIR" + +echo "Download Go ${GO_VERSION}" +curl -OL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" + +echo "Extract Go" +tar -xzf "go${GO_VERSION}.linux-amd64.tar.gz" +rm "go${GO_VERSION}.linux-amd64.tar.gz" + +echo "Go version: $(go version)" + +echo "Install xk6" +go install "go.k6.io/xk6/cmd/xk6@${XK6_VERSION}" + +echo "xk6 installed: ${XK6_BIN}" +"$XK6_BIN" --help > /dev/null && echo "xk6 executable is available" + +echo "Build k6 with Prometheus remote-write output" +"$XK6_BIN" build --with "github.com/grafana/xk6-output-prometheus-remote@${XK6_PROMETHEUS_REMOTE_VERSION}" + +echo "Build complete: $(pwd)/k6" +ls -lh ./k6 + +echo "xk6 setup completed" diff --git a/config/load-test/k6/updatePost.json b/config/load-test/k6/updatePost.json new file mode 100644 index 0000000..660f0a8 --- /dev/null +++ b/config/load-test/k6/updatePost.json @@ -0,0 +1,5 @@ +{ + "postCategory": "자유", + "title": "수강신청 어떻게 하나요?", + "content": "수강신청 방법을 잘 알겠어요." +} diff --git a/config/load-test/k6/whole-user-flow.js b/config/load-test/k6/whole-user-flow.js new file mode 100644 index 0000000..b0a9166 --- /dev/null +++ b/config/load-test/k6/whole-user-flow.js @@ -0,0 +1,409 @@ +import http from 'k6/http'; +import { sleep, check, fail } from 'k6'; + +// KST +const now = new Date(); +const kstOffset = 9 * 60; // 분 단위 +const d = new Date(now.getTime() + kstOffset * 60 * 1000); +const kst = d.toISOString().slice(0, 16); // "yyyy-mm-ddTHH:MM" + +// "mm/dd HH:MM" +const time = (() => { + const [yyyy, mm, dd, hh, min] = kst.split(/[-T:]/); + return `${mm}/${dd} ${hh}:${min}`; +})(); + +const BASE_URL = __ENV.BASE_URL || 'https://api.stage.solid-connection.com'; +const testId = 'whole-user-flow'; + +export const options = { + scenarios: { + user_flow: { + executor: 'per-vu-iterations', // VU별 반복 + vus: Number(__ENV.K6_VUS || 10), + iterations: Number(__ENV.K6_ITERATIONS || 10), + maxDuration: __ENV.K6_MAX_DURATION || '15m', + }, + }, + tags: { + testid: testId, + time: time, + }, +}; + +function authHeadersWithTags(token) { + return { + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + tags: { + ...options.tags, + time: time, + }, + }; +} + +function login() { + // __VU: 현재 VU 인덱스 + const email = `user${__VU}@example.com`; + const password = 'password'; + + const res = http.post(`${BASE_URL}/auth/email/sign-in`, JSON.stringify({ + email: email, + password: password, + }), { + headers: { 'Content-Type': 'application/json; charset=utf-8' }, + tags: { + name: '/auth/email/sign-in', + } + }); + if (res.status !== 200) { + fail('로그인 실패'); + } + return res.json('accessToken'); +} + +// universites +function getRecommendedUniversities(auth) { + http.get(`${BASE_URL}/universities/recommend`, { + ...auth, + tags: { + ...auth.tags, + name: '/universities/recommend', + }, +}); +} +function likeUniversity(id, auth) { + http.post(`${BASE_URL}/universities/${id}/like`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/universities/{id}/like', + }, + }); +} +function isLikedUniversity(id, auth) { + http.get(`${BASE_URL}/universities/${id}/like`, { + ...auth, + tags: { + ...auth.tags, + name: '/universities/{id}/like', + }, + }); +} +function getLikedUniversities(auth) { + http.get(`${BASE_URL}/universities/like`, { + ...auth, + tags: { + ...auth.tags, + name: '/universities/like', + }, +}); +} +function cancelLikeUniversity(id, auth) { + http.del(`${BASE_URL}/universities/${id}/like`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/universities/{id}/like', + }, + }); +} +function searchUniversities(params) { + return http.get(`${BASE_URL}/universities/search?${params}`, { + tags: { + name: '/universities/search?{params}', + }, +}); +} +function getDetailedUniversityInfo(id) { + http.get(`${BASE_URL}/universities/${id}`, { + tags: { + name: '/universities/{id}', + }, + }); +} + +// my +function getMyInfo(auth) { + http.get(`${BASE_URL}/my`, { + ...auth, + tags: { + ...auth.tags, + name: '/my', + }, +}); +} + +// users +function checkNicknameExists(nickname) { + http.get(`${BASE_URL}/users/exists?nickname=${nickname}`, { + tags: { + name: '/users/exists?nickname={nickname}', + }, + }); +} + +// boards +function getBoards(auth) { + http.get(`${BASE_URL}/boards`, { + ...auth, + tags: { + ...auth.tags, + name: '/boards', + }, +}); +} +function getPostsByBoard(boardCode, auth) { + http.get(`${BASE_URL}/boards/${boardCode}`, { + ...auth, + tags: { + ...auth.tags, + name: '/boards/{boardCode}', + }, + }); +} + +// posts +const createPostJson = open('./createPost.json', 'b'); +function createPost(token) { + const formData = { + postCreateRequest: http.file(createPostJson, 'post.json', 'application/json'), + }; + const res = http.post(`${BASE_URL}/posts`, formData, { + headers: { + Authorization: `Bearer ${token}` + }, + tags: { + testid: testId, + time: time, + name: '/posts' + }, + }); + return res.json('id'); +} +const updatePostJson = open('./updatePost.json', 'b'); +function updatePost(postId, token) { + const formData = { + postUpdateRequest: http.file(updatePostJson, 'post.json', 'application/json'), + }; + http.patch(`${BASE_URL}/posts/${postId}`, formData, { + headers: { + Authorization: `Bearer ${token}` + }, + tags: { + testid: testId, + time: time, + name: '/posts/{postId}' + }, + }); +} +function getPostDetail(postId, auth) { + http.get(`${BASE_URL}/posts/${postId}`, { + ...auth, + tags: { + ...auth.tags, + name: '/posts/{postId}', + }, + }); +} +function likePost(postId, auth) { + http.post(`${BASE_URL}/posts/${postId}/like`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/posts/{postId}/like', + }, + }); +} +function cancelLikePost(postId, auth) { + http.del(`${BASE_URL}/posts/${postId}/like`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/posts/{postId}/like', + }, + }); +} +function deletePost(postId, auth) { + http.del(`${BASE_URL}/posts/${postId}`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/posts/{postId}', + }, + }); +} + +// comments +function createComment(postId, auth) { + const res = http.post( + `${BASE_URL}/comments`, + JSON.stringify({ postId, content: '댓글', parentId: null }), + { + ...auth, + tags: { + ...auth.tags, + name: '/comments', + }, + }); + return res.json('id'); +} +function updateComment(commentId, auth) { + http.patch( + `${BASE_URL}/comments/${commentId}`, + JSON.stringify({ content: '댓글 수정' }), + { + ...auth, + tags: { + ...auth.tags, + name: '/comments/{commentId}', + }, + } + ); +} +function deleteComment(commentId, auth) { + http.del(`${BASE_URL}/comments/${commentId}`, null, { + ...auth, + tags: { + ...auth.tags, + name: '/comments/{commentId}', + }, + }); +} + +// scores +function getLanguageTests(auth) { + return http.get(`${BASE_URL}/scores/language-tests`, { + ...auth, + tags: { + ...auth.tags, + name: '/scores/language-tests', + }, + }); +} +function getGPAs(auth) { + return http.get(`${BASE_URL}/scores/gpas`, { + ...auth, + tags: { + ...auth.tags, + name: '/scores/gpas', + }, + }); +} + +function requireJson(res, name) { + if (res.status < 200 || res.status >= 300) { + fail(`${name} failed with status ${res.status}: ${res.body}`); + } + + try { + return res.json(); + } catch (error) { + fail(`${name} returned invalid JSON: ${error.message}`); + } +} + +function requireArray(value, name, status) { + if (!Array.isArray(value) || value.length === 0) { + fail(`${name} response is empty or invalid (status: ${status})`); + } + return value; +} + +function requireId(value, name) { + if (!value || value.id === undefined || value.id === null) { + fail(`${name} response does not contain id`); + } + return value.id; +} + +// applications +function apply(gpaScoreId, languageTestScoreId, universityId, auth) { + http.post(`${BASE_URL}/applications`, JSON.stringify({ + gpaScoreId: gpaScoreId, + languageTestScoreId: languageTestScoreId, + universityChoiceRequest: { + firstChoiceUniversityId: universityId, + secondChoiceUniversityId: null, + thirdChoiceUniversityId: null + }, + }), { + ...auth, + tags: { + ...auth.tags, + name: '/applications', + }, + }); +} + +function getCompetitors(auth) { + http.get(`${BASE_URL}/applications/competitors`, { + ...auth, + tags: { + ...auth.tags, + name: '/applications/competitors', + }, + }); +} + +export default function () { + checkNicknameExists(encodeURIComponent('loadtest-user')); + const token = login(); + const auth = authHeadersWithTags(token); + + + getRecommendedUniversities(auth); + + const uniSearchRes = searchUniversities(''); // 이번학기 열린 대학 중 랜덤하게 id 가져오기 + const uniSearchBody = requireJson(uniSearchRes, 'searchUniversities'); + const uniList = requireArray(uniSearchBody, 'searchUniversities', uniSearchRes.status); + const universityId = requireId(uniList[Math.floor(Math.random() * uniList.length)], 'universities/search item'); + + likeUniversity(universityId, auth); + isLikedUniversity(universityId, auth); + getLikedUniversities(auth); + cancelLikeUniversity(universityId, auth); + getDetailedUniversityInfo(universityId); + + getMyInfo(auth); + + getBoards(auth); + getPostsByBoard('FREE', auth); + + const postId = createPost(token); + updatePost(postId, token); + getPostDetail(postId, auth); + likePost(postId, auth); + cancelLikePost(postId, auth); + + const commentId = createComment(postId, auth); + updateComment(commentId, auth); + deleteComment(commentId, auth); + + deletePost(postId, auth); + + const langRes = getLanguageTests(auth); + const langBody = requireJson(langRes, 'getLanguageTests'); + const langList = requireArray( + langBody && langBody.languageTestScoreStatusResponseList, + 'getLanguageTests.languageTestScoreStatusResponseList', + langRes.status + ); + const languageTestScoreId = requireId(langList[0], 'scores/language-tests item'); + + const gpaRes = getGPAs(auth); + const gpaBody = requireJson(gpaRes, 'getGPAs'); + const gpaList = requireArray( + gpaBody && gpaBody.gpaScoreStatusResponseList, + 'getGPAs.gpaScoreStatusResponseList', + gpaRes.status + ); + const gpaScoreId = requireId(gpaList[0], 'scores/gpas item'); + + apply(gpaScoreId, languageTestScoreId, universityId, auth); + getCompetitors(auth); + + sleep(1); +} diff --git a/config/secrets b/config/secrets index f88a84c..40fe5a2 160000 --- a/config/secrets +++ b/config/secrets @@ -1 +1 @@ -Subproject commit f88a84cdab72136d294614fd1e2c855c4a026c43 +Subproject commit 40fe5a23122f5b5fe84c1ecbcec237318ba6358d diff --git a/environment/load_test/main.tf b/environment/load_test/main.tf index 995074f..98753c4 100644 --- a/environment/load_test/main.tf +++ b/environment/load_test/main.tf @@ -1 +1,215 @@ -# TODO:: 부하 테스트용 EC2 인스턴스 및 보안 그룹 리소스 정의 필요 +data "aws_instance" "prod_api" { + filter { + name = "tag:Name" + values = [var.prod_api_instance_name] + } + + filter { + name = "instance-state-name" + values = ["running"] + } +} + +data "aws_instance" "stage_api" { + filter { + name = "tag:Name" + values = [var.stage_api_instance_name] + } + + filter { + name = "instance-state-name" + values = ["running"] + } +} + +data "aws_subnet" "stage_api" { + id = data.aws_instance.stage_api.subnet_id +} + +data "aws_subnets" "target" { + filter { + name = "vpc-id" + values = [data.aws_subnet.stage_api.vpc_id] + } +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +data "aws_db_instance" "prod" { + db_instance_identifier = var.prod_rds_identifier +} + +data "aws_db_snapshot" "latest_prod" { + db_instance_identifier = var.prod_rds_identifier + most_recent = true + snapshot_type = "automated" +} + +data "aws_ssm_parameter" "prod_db_username" { + name = var.prod_db_username_parameter_name +} + +data "aws_ssm_parameter" "prod_db_password" { + name = var.prod_db_password_parameter_name + with_decryption = true +} + +locals { + source_security_group_ids = setunion( + data.aws_instance.prod_api.vpc_security_group_ids, + data.aws_instance.stage_api.vpc_security_group_ids + ) +} + +resource "aws_security_group" "load_test_db" { + name = "sc-load-test-db-sg" + description = "Security group for load test RDS" + vpc_id = data.aws_subnet.stage_api.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "solid-connection-load-test-db-sg" + } +} + +resource "aws_security_group_rule" "load_test_db_mysql" { + for_each = local.source_security_group_ids + + type = "ingress" + description = "MySQL from prod/stage API server" + from_port = 3306 + to_port = 3306 + protocol = "tcp" + security_group_id = aws_security_group.load_test_db.id + source_security_group_id = each.value +} + +resource "aws_security_group" "load_generator" { + count = var.create_load_generator ? 1 : 0 + + name = "sc-load-test-generator-sg" + description = "Security group for k6 load generator" + vpc_id = data.aws_subnet.stage_api.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "solid-connection-load-test-generator-sg" + } +} + +resource "aws_instance" "load_generator" { + count = var.create_load_generator ? 1 : 0 + + ami = data.aws_ami.ubuntu.id + instance_type = var.load_generator_instance_type + subnet_id = data.aws_instance.stage_api.subnet_id + vpc_security_group_ids = [aws_security_group.load_generator[0].id] + associate_public_ip_address = true + iam_instance_profile = var.load_generator_instance_profile_name + + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" + http_put_response_hop_limit = 1 + } + + root_block_device { + volume_size = var.load_generator_root_volume_size + volume_type = "gp3" + encrypted = true + delete_on_termination = true + } + + user_data = <<-EOF + #!/bin/bash + set -eux + export DEBIAN_FRONTEND=noninteractive + apt-get update + apt-get install -y curl jq + snap install amazon-ssm-agent --classic || true + systemctl enable snap.amazon-ssm-agent.amazon-ssm-agent.service || true + systemctl restart snap.amazon-ssm-agent.amazon-ssm-agent.service || true + EOF + + tags = { + Name = "solid-connection-load-test-generator" + } +} + +resource "aws_db_subnet_group" "load_test" { + name = "sc-load-test-db-subnet-group" + subnet_ids = data.aws_subnets.target.ids + + tags = { + Name = "solid-connection-load-test-db-subnet-group" + } +} + +resource "aws_db_instance" "load_test" { + identifier = var.rds_identifier + instance_class = var.db_instance_class + parameter_group_name = var.db_parameter_group_name + snapshot_identifier = data.aws_db_snapshot.latest_prod.id + db_subnet_group_name = aws_db_subnet_group.load_test.name + vpc_security_group_ids = [aws_security_group.load_test_db.id] + publicly_accessible = false + skip_final_snapshot = true + copy_tags_to_snapshot = true + deletion_protection = false + backup_retention_period = 0 + apply_immediately = true + storage_encrypted = true + kms_key_id = var.kms_key_arn + + tags = { + Name = var.rds_identifier + } +} + +resource "aws_ssm_parameter" "load_test_datasource_url" { + name = "${var.load_test_parameter_prefix}/spring.datasource.url" + type = "String" + value = "jdbc:mysql://${aws_db_instance.load_test.address}:${aws_db_instance.load_test.port}/${var.db_name}?serverTimezone=Asia/Seoul&characterEncoding=UTF-8" + overwrite = true +} + +resource "aws_ssm_parameter" "load_test_datasource_username" { + name = "${var.load_test_parameter_prefix}/spring.datasource.username" + type = "String" + value = data.aws_ssm_parameter.prod_db_username.value + overwrite = true +} + +resource "aws_ssm_parameter" "load_test_datasource_password" { + name = "${var.load_test_parameter_prefix}/spring.datasource.password" + type = "SecureString" + value = data.aws_ssm_parameter.prod_db_password.value + key_id = var.ssm_kms_key_id + overwrite = true + tier = "Standard" +} diff --git a/environment/load_test/output.tf b/environment/load_test/output.tf new file mode 100644 index 0000000..36e2db7 --- /dev/null +++ b/environment/load_test/output.tf @@ -0,0 +1,84 @@ +output "load_test_rds_endpoint" { + description = "Load test RDS endpoint" + value = aws_db_instance.load_test.address +} + +output "load_test_rds_port" { + description = "Load test RDS port" + value = aws_db_instance.load_test.port +} + +output "load_test_rds_identifier" { + description = "Load test RDS identifier" + value = aws_db_instance.load_test.identifier +} + +output "load_test_db_name" { + description = "Load test database name" + value = var.db_name +} + +output "prod_rds_endpoint" { + description = "Prod RDS endpoint used as dump source" + value = data.aws_db_instance.prod.address +} + +output "prod_rds_port" { + description = "Prod RDS port" + value = data.aws_db_instance.prod.port +} + +output "prod_api_instance_id" { + description = "Prod API EC2 instance ID whose security group can access load-test RDS" + value = data.aws_instance.prod_api.id +} + +output "stage_api_instance_id" { + description = "Stage API EC2 instance ID" + value = data.aws_instance.stage_api.id +} + +output "stage_api_public_ip" { + description = "Stage API EC2 public IP" + value = data.aws_instance.stage_api.public_ip +} + +output "load_test_ssm_parameter_prefix" { + description = "SSM Parameter Store prefix for load test datasource values" + value = var.load_test_parameter_prefix +} + +output "prod_db_username_parameter_name" { + description = "SSM parameter name containing the prod DB username" + value = var.prod_db_username_parameter_name +} + +output "prod_db_password_parameter_name" { + description = "SSM SecureString parameter name containing the prod DB password" + value = var.prod_db_password_parameter_name +} + +output "load_generator_instance_id" { + description = "k6 load generator EC2 instance ID" + value = try(aws_instance.load_generator[0].id, "") +} + +output "load_generator_private_ip" { + description = "k6 load generator private IP" + value = try(aws_instance.load_generator[0].private_ip, "") +} + +output "load_generator_k6_dir" { + description = "Directory where k6 files are placed on the load generator" + value = var.load_generator_k6_dir +} + +output "load_test_target_base_url" { + description = "Default target base URL for k6" + value = var.load_test_target_base_url +} + +output "k6_prometheus_remote_write_url" { + description = "Default Prometheus remote-write URL for k6" + value = var.k6_prometheus_remote_write_url +} diff --git a/environment/load_test/provider.tf b/environment/load_test/provider.tf index 3c3f8d1..8b41b3d 100644 --- a/environment/load_test/provider.tf +++ b/environment/load_test/provider.tf @@ -1,3 +1,22 @@ +terraform { + required_version = ">= 1.10.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } + + backend "s3" { + bucket = "solid-connection-tfstate" + key = "env/load_test/terraform.tfstate" + region = "ap-northeast-2" + use_lockfile = true + encrypt = true + } +} + provider "aws" { region = "ap-northeast-2" default_tags { diff --git a/environment/load_test/variables.tf b/environment/load_test/variables.tf index 6f74e1f..2710ffe 100644 --- a/environment/load_test/variables.tf +++ b/environment/load_test/variables.tf @@ -1 +1,136 @@ -# TODO:: 부하 테스트 인스턴스용 변수 정의 +variable "rds_identifier" { + description = "RDS identifier for load test" + type = string +} + +variable "db_instance_class" { + description = "RDS instance class for load test" + type = string +} + +variable "allocated_storage" { + description = "RDS storage in GiB" + type = number + default = 20 +} + +variable "db_engine_version" { + description = "Deprecated. The load-test RDS is restored from the latest prod snapshot." + type = string + default = null + nullable = true +} + +variable "db_parameter_group_name" { + description = "MySQL parameter group name" + type = string +} + +variable "db_name" { + description = "Application database name" + type = string + default = "solid_connection" +} + +variable "load_test_db_username_parameter_name" { + description = "Deprecated compatibility input. Load-test datasource credentials are copied from prod datasource parameters." + type = string + default = null + nullable = true +} + +variable "load_test_db_password_parameter_name" { + description = "Deprecated compatibility input. Load-test datasource credentials are copied from prod datasource parameters." + type = string + default = null + nullable = true +} + +variable "prod_db_username_parameter_name" { + description = "SSM parameter name containing the prod DB username" + type = string + default = "/solid-connection/prod/spring.datasource.username" +} + +variable "prod_db_password_parameter_name" { + description = "SSM SecureString parameter name containing the prod DB password" + type = string + default = "/solid-connection/prod/spring.datasource.password" +} + +variable "kms_key_arn" { + description = "KMS key ARN for RDS storage encryption" + type = string +} + +variable "ssm_kms_key_id" { + description = "KMS key ID or ARN for SSM SecureString. Null uses the AWS managed aws/ssm key." + type = string + default = null + nullable = true +} + +variable "prod_rds_identifier" { + description = "Source prod RDS identifier" + type = string +} + +variable "prod_api_instance_name" { + description = "Name tag of the prod API EC2 instance whose security group can access load-test RDS" + type = string + default = "solid-connection-server-prod" +} + +variable "stage_api_instance_name" { + description = "Name tag of the stage API EC2 instance that will connect to load test RDS" + type = string + default = "solid-connection-server-stage" +} + +variable "load_test_parameter_prefix" { + description = "SSM Parameter Store prefix for load test datasource values" + type = string + default = "/solid-connection/loadtest" +} + +variable "load_generator_instance_type" { + description = "EC2 instance type for the k6 load generator" + type = string + default = "c7i.xlarge" +} + +variable "create_load_generator" { + description = "Whether to create the k6 load generator EC2 instance" + type = bool + default = true +} + +variable "load_generator_instance_profile_name" { + description = "Existing IAM instance profile name for the k6 load generator. It must allow SSM RunCommand." + type = string + default = "solid-connection-load-test-generator" +} + +variable "load_generator_root_volume_size" { + description = "Root volume size in GiB for the k6 load generator" + type = number + default = 20 +} + +variable "load_generator_k6_dir" { + description = "Directory where k6 files are placed on the load generator" + type = string + default = "/home/ubuntu/solid-connection-load-test/k6" +} + +variable "load_test_target_base_url" { + description = "Default target base URL for k6" + type = string + default = "https://api.stage.solid-connection.com" +} + +variable "k6_prometheus_remote_write_url" { + description = "Default Prometheus remote-write URL for k6. Empty disables remote-write unless the workflow input overrides it." + type = string + default = "" +} diff --git a/environment/stage/main.tf b/environment/stage/main.tf index 3f3e129..a0b0b44 100644 --- a/environment/stage/main.tf +++ b/environment/stage/main.tf @@ -6,8 +6,8 @@ data "aws_vpc" "default" { module "stage_stack" { source = "../../modules/app_stack" - env_name = "stage" - vpc_id = data.aws_vpc.default.id + env_name = "stage" + vpc_id = data.aws_vpc.default.id ami_id = var.ami_id @@ -15,13 +15,13 @@ module "stage_stack" { ec2_iam_instance_profile = var.ec2_iam_instance_profile # 키페어 및 접속 허용 - key_name = var.key_name + key_name = var.key_name # 인스턴스 스펙 - instance_type = var.server_instance_type + instance_type = var.server_instance_type # RDS 미사용 (Docker container로 대체) - enable_rds = false + enable_rds = false # 보안 그룹 규칙 api_ingress_rules = var.api_ingress_rules diff --git a/modules/app_stack/ec2.tf b/modules/app_stack/ec2.tf index b49aa52..6734a28 100644 --- a/modules/app_stack/ec2.tf +++ b/modules/app_stack/ec2.tf @@ -21,6 +21,7 @@ data "cloudinit_config" "app_init" { content = file("${path.module}/../common/scripts/docker_setup.sh") filename = "1_docker_install.sh" } + } # API Server (EC2) @@ -100,9 +101,9 @@ resource "null_resource" "update_side_infra" { triggers = { script_hash = sha256(templatefile("${path.module}/scripts/side_infra_setup.sh.tftpl", { - work_dir = var.work_dir - alloy_env_name = var.alloy_env_name - alloy_config_content = templatefile("${path.module}/../../config/side-infra/config.alloy.tftpl", { + work_dir = var.work_dir + alloy_env_name = var.alloy_env_name + alloy_config_content = templatefile("${path.module}/../../config/side-infra/config.alloy.tftpl", { loki_ip = data.aws_instance.monitoring_server.private_ip }) redis_version = var.redis_version @@ -120,9 +121,9 @@ resource "null_resource" "update_side_infra" { provisioner "file" { content = templatefile("${path.module}/scripts/side_infra_setup.sh.tftpl", { - work_dir = var.work_dir - alloy_env_name = var.alloy_env_name - alloy_config_content = templatefile("${path.module}/../../config/side-infra/config.alloy.tftpl", { + work_dir = var.work_dir + alloy_env_name = var.alloy_env_name + alloy_config_content = templatefile("${path.module}/../../config/side-infra/config.alloy.tftpl", { loki_ip = data.aws_instance.monitoring_server.private_ip }) redis_version = var.redis_version diff --git a/modules/app_stack/variables.tf b/modules/app_stack/variables.tf index 33f8b1a..1c5028d 100644 --- a/modules/app_stack/variables.tf +++ b/modules/app_stack/variables.tf @@ -67,7 +67,7 @@ variable "additional_db_users" { database = string privileges = list(string) })) - default = {} + default = {} } variable "db_engine_version" { diff --git a/scripts/load_test/README.md b/scripts/load_test/README.md new file mode 100644 index 0000000..3027b9b --- /dev/null +++ b/scripts/load_test/README.md @@ -0,0 +1,132 @@ +# 부하 테스트 자동화 + +이 디렉터리는 부하 테스트용 GitHub Actions workflow에서 사용하는 스크립트를 담고 있습니다. + +전체 흐름은 다음과 같습니다. + +1. **Load Test Start**: 임시 부하 테스트 인프라를 만들고 stage를 준비합니다. +2. **Load Test Run**: k6 부하 생성 EC2를 만들고 k6를 실행한 뒤 기본적으로 제거합니다. +3. **Load Test Stop**: stage를 복구하고 임시 부하 테스트 스택을 제거합니다. + +## 규칙 + +- 환경 Terraform에 대해 로컬에서 `terraform apply` 또는 `terraform destroy`를 실행하지 않습니다. +- 시작, 실행, 종료는 GitHub Actions에서 수행합니다. +- k6는 stage EC2에서 실행하지 않습니다. Run workflow가 생성한 별도 load-generator EC2에서 실행합니다. +- load-generator EC2는 비용 절감을 위해 기본적으로 Run workflow 종료 시 제거합니다. +- SSH private key를 사용하지 않습니다. EC2 명령은 SSM RunCommand로 실행합니다. + +## 필요한 설정 + +`environment/load_test`는 `config/secrets/load_test.tfvars`를 사용합니다. + +스냅샷 복원 방식에서는 load-test DB root 계정을 별도로 만들지 않습니다. load-test datasource username/password는 prod datasource Parameter Store 값을 복사해 사용합니다. + +주요 확인값: + +- `prod_rds_identifier`: snapshot을 조회할 prod RDS identifier +- `kms_key_arn`: 복원된 load-test RDS storage encryption에 사용할 KMS key ARN +- `prod_db_username_parameter_name`: 기본값 `/solid-connection/prod/spring.datasource.username` +- `prod_db_password_parameter_name`: 기본값 `/solid-connection/prod/spring.datasource.password` + +그 외 부하 테스트 설정값은 Terraform 기본값, GitHub Actions variable, workflow 입력값으로 처리합니다. + +## Load Test Start + +GitHub에서 **Actions > Load Test Start**를 수동 실행합니다. + +입력값: + +- `switch_stage_to_loadtest`: `true` 또는 `false` + - `true`이면 데이터 준비 후 stage 앱을 `dev,loadtest` profile로 재기동합니다. + +Start workflow 동작: + +1. GitHub Actions가 `environment/load_test`에서 Terraform apply를 실행합니다. +2. Terraform이 최신 prod RDS 자동 snapshot을 조회합니다. +3. Terraform이 해당 snapshot에서 load-test RDS를 복원합니다. +4. Terraform이 load-test datasource 값을 Parameter Store에 기록합니다. + - datasource URL은 복원된 load-test RDS endpoint를 사용합니다. + - datasource username/password는 prod datasource Parameter Store 값을 사용합니다. +5. `scripts/load_test/start.sh`가 Terraform output에서 필요한 값을 읽습니다. +6. `switch_stage_to_loadtest=true`이면 stage 앱을 `dev,loadtest` profile로 재기동합니다. + +Start workflow는 load-generator EC2를 만들지 않습니다. 부하 생성용 EC2는 비용 누수를 막기 위해 Run workflow에서만 생성합니다. + +## Load Test Run + +GitHub에서 **Actions > Load Test Run**을 수동 실행합니다. + +입력값: + +- `vus`: k6 virtual user 수입니다. 예: `10` +- `iterations`: VU당 반복 횟수입니다. 예: `10` +- `max_duration`: 최대 실행 시간입니다. 예: `30s`, `5m`, `15m`, `1h` +- `target_base_url` + - 선택값입니다. 비워두면 Terraform output `load_test_target_base_url`을 사용합니다. +- `prometheus_remote_write_url` + - 선택값입니다. 비워두면 Terraform output `k6_prometheus_remote_write_url`을 사용합니다. + - Terraform output도 비어 있으면 Prometheus remote-write 전송은 비활성화됩니다. +- `destroy_runner`: `true` 또는 `false` + - 기본값은 `true`입니다. + - `true`이면 k6 실행이 끝난 뒤 load-generator EC2를 제거합니다. + - `false`이면 디버깅이나 재실행을 위해 load-generator EC2를 남깁니다. +- `rebuild_k6`: `true` 또는 `false` + - 기본값은 `false`입니다. + - `true`이면 실행 전 기존 k6 binary를 지우고 `set_up_xk6.sh`로 다시 빌드합니다. + +Run workflow 동작: + +1. `scripts/load_test/run_k6.sh`가 Terraform target apply로 load-generator EC2와 보안 그룹을 생성합니다. +2. Terraform output에서 load-generator EC2 ID와 k6 기본값을 읽습니다. +3. load-generator EC2의 SSM agent가 online 상태가 될 때까지 기다립니다. +4. SSM RunCommand로 k6 파일을 load-generator EC2에 동기화합니다. +5. 이전 실행에서 남아 있을 수 있는 k6 프로세스를 정리합니다. +6. k6 binary가 없거나 `rebuild_k6=true`이면 `set_up_xk6.sh`로 Prometheus remote-write 지원이 포함된 k6를 빌드합니다. +7. load-generator EC2에서 `whole-user-flow.js`를 실행합니다. +8. `destroy_runner=true`이면 실행 성공/실패와 관계없이 load-generator EC2와 보안 그룹을 제거합니다. + +`destroy_runner=false`로 runner를 남긴 뒤 다시 Run workflow를 실행해도 됩니다. 이 경우 기존 EC2를 재사용하며, k6 파일은 매번 다시 동기화됩니다. + +동기화되는 k6 파일: + +- `createPost.json` +- `updatePost.json` +- `whole-user-flow.js` +- `set_up_xk6.sh` + +## 결과 확인 + +간단한 실행 결과는 **Load Test Run** GitHub Actions 로그에서 확인합니다. + +k6 스크립트는 remote-write URL이 설정된 경우 Prometheus remote-write로도 지표를 전송합니다. + +- 기본 remote-write URL은 Terraform output `k6_prometheus_remote_write_url`을 사용합니다. +- Terraform 기본값은 비어 있으므로, 전송이 필요하면 Terraform 변수나 workflow 입력값 `prometheus_remote_write_url`로 URL을 넣습니다. +- k6 지표에는 요청 수, 실패율, 응답 시간, p90, p95, p99, 평균, 최소, 최대값이 포함됩니다. +- API 호출에는 `name`, `testid`, `time` tag가 붙어 endpoint와 실행 시점별로 필터링할 수 있습니다. + +## Load Test Stop + +GitHub에서 **Actions > Load Test Stop**을 수동 실행합니다. + +입력값: + +- `restore_stage_dev`: `true` 또는 `false` + - `true`이면 stage 앱을 기존 dev compose 구성으로 되돌립니다. +- `destroy_rds`: `true` 또는 `false` + - `true`이면 load-test Terraform stack을 destroy합니다. + +Stop workflow 동작: + +1. `scripts/load_test/stop.sh`가 `environment/load_test`에서 Terraform init을 실행합니다. +2. `restore_stage_dev=true`이면 stage를 dev datasource 구성으로 복구합니다. +3. `destroy_rds=true`이면 Terraform destroy로 load-test RDS와 남아 있는 load-generator EC2를 제거합니다. + +## 참고 + +- GitHub Actions는 OIDC로 AWS role을 assume합니다. +- private submodule checkout에는 `GH_PAT`를 사용합니다. +- prod/stage EC2는 `Name` tag로 조회합니다. +- prod/load-test DB 계정 정보는 Parameter Store에서 읽습니다. +- load-test RDS 보안 그룹은 prod/stage API EC2 보안 그룹에서 들어오는 MySQL 접근만 허용합니다. diff --git a/scripts/load_test/run_k6.sh b/scripts/load_test/run_k6.sh new file mode 100644 index 0000000..ba0f099 --- /dev/null +++ b/scripts/load_test/run_k6.sh @@ -0,0 +1,249 @@ +#!/usr/bin/env bash +set -euo pipefail + +TERRAFORM_DIR="environment/load_test" +VAR_FILE="../../config/secrets/load_test.tfvars" +LOCAL_K6_DIR="config/load-test/k6" +K6_SCRIPT="whole-user-flow.js" +TARGET_BASE_URL="" +PROMETHEUS_REMOTE_WRITE_URL="" +K6_VUS="10" +K6_ITERATIONS="10" +K6_MAX_DURATION="15m" +SSM_COMMAND_TIMEOUT_SECONDS="${SSM_COMMAND_TIMEOUT_SECONDS:-3600}" +DESTROY_RUNNER="true" +REBUILD_K6="false" + +usage() { + cat <<'EOF' +Usage: scripts/load_test/run_k6.sh [options] + +Options: + --terraform-dir PATH Default: environment/load_test + --var-file PATH Default: ../../config/secrets/load_test.tfvars + --local-k6-dir PATH Default: config/load-test/k6 + --script FILE Default: whole-user-flow.js + --target-base-url URL Default: Terraform output load_test_target_base_url + --prometheus-remote-write-url URL Default: Terraform output k6_prometheus_remote_write_url + --vus VALUE Default: 10 + --iterations VALUE Default: 10 + --max-duration VALUE Default: 15m + --ssm-command-timeout-seconds Default: 3600 + --skip-runner-destroy Keep the k6 load generator after the run + --rebuild-k6 Rebuild the k6 binary before running + -h, --help +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --terraform-dir) TERRAFORM_DIR="$2"; shift 2 ;; + --var-file) VAR_FILE="$2"; shift 2 ;; + --local-k6-dir) LOCAL_K6_DIR="$2"; shift 2 ;; + --script) K6_SCRIPT="$2"; shift 2 ;; + --target-base-url) TARGET_BASE_URL="$2"; shift 2 ;; + --prometheus-remote-write-url) PROMETHEUS_REMOTE_WRITE_URL="$2"; shift 2 ;; + --vus) K6_VUS="$2"; shift 2 ;; + --iterations) K6_ITERATIONS="$2"; shift 2 ;; + --max-duration) K6_MAX_DURATION="$2"; shift 2 ;; + --ssm-command-timeout-seconds) SSM_COMMAND_TIMEOUT_SECONDS="$2"; shift 2 ;; + --skip-runner-destroy) DESTROY_RUNNER="false"; shift ;; + --rebuild-k6) REBUILD_K6="true"; shift ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; + esac +done + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Required command not found: $1" >&2 + exit 1 + fi +} + +require_command terraform +require_command aws +require_command jq +require_command base64 + +tf_output() { + terraform -chdir="$TERRAFORM_DIR" output -raw "$1" +} + +runner_targets=( + -target=aws_security_group.load_generator + -target=aws_instance.load_generator +) + +destroy_runner() { + local exit_code="$?" + local cleanup_code=0 + + if [[ "$DESTROY_RUNNER" == "true" ]]; then + terraform -chdir="$TERRAFORM_DIR" destroy -auto-approve -var-file="$VAR_FILE" "${runner_targets[@]}" || cleanup_code="$?" + fi + + if [[ "$exit_code" -ne 0 ]]; then + exit "$exit_code" + fi + + exit "$cleanup_code" +} + +send_ssm_command() { + local instance_id="$1" + local comment="$2" + local commands_json="$3" + + local command_id + local started_at + command_id="$(aws ssm send-command \ + --instance-ids "$instance_id" \ + --document-name "AWS-RunShellScript" \ + --comment "$comment" \ + --parameters "$commands_json" \ + --query "Command.CommandId" \ + --output text)" + started_at="$(date +%s)" + + local status + while true; do + sleep 5 + status="$(aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --query "Status" \ + --output text 2>/dev/null || true)" + + if (( $(date +%s) - started_at > SSM_COMMAND_TIMEOUT_SECONDS )); then + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command timed out after ${SSM_COMMAND_TIMEOUT_SECONDS}s: $comment" >&2 + exit 1 + fi + + case "$status" in + Pending|InProgress|Delayed|"") continue ;; + Success) + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --query "StandardOutputContent" \ + --output text || true + break + ;; + *) + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command failed with status $status: $comment" >&2 + exit 1 + ;; + esac + done +} + +wait_for_ssm() { + local instance_id="$1" + local started_at + started_at="$(date +%s)" + + while true; do + local ping_status + ping_status="$(aws ssm describe-instance-information \ + --filters "Key=InstanceIds,Values=${instance_id}" \ + --query "InstanceInformationList[0].PingStatus" \ + --output text 2>/dev/null || true)" + + if [[ "$ping_status" == "Online" ]]; then + break + fi + + if (( $(date +%s) - started_at > SSM_COMMAND_TIMEOUT_SECONDS )); then + echo "SSM agent did not become online after ${SSM_COMMAND_TIMEOUT_SECONDS}s: ${instance_id}" >&2 + exit 1 + fi + + sleep 10 + done +} + +file_base64() { + base64 "$1" | tr -d '\n' +} + +sync_file() { + local instance_id="$1" + local target_dir="$2" + local relative_path="$3" + local source_path="${LOCAL_K6_DIR}/${relative_path}" + + if [[ ! -f "$source_path" ]]; then + echo "Missing k6 file: $source_path" >&2 + exit 1 + fi + + local commands_json + commands_json="$(jq -cn \ + --arg target "${target_dir}/${relative_path}" \ + --arg content "$(file_base64 "$source_path")" \ + '{ + commands: [ + "set -euo pipefail", + "mkdir -p \"$(dirname \"\($target)\")\"", + "printf %s \($content | @sh) | base64 -d > \($target | @sh)" + ] + }')" + + send_ssm_command "$instance_id" "Sync ${relative_path} to load generator" "$commands_json" +} + +terraform -chdir="$TERRAFORM_DIR" init +terraform -chdir="$TERRAFORM_DIR" apply -auto-approve -var-file="$VAR_FILE" "${runner_targets[@]}" + +trap destroy_runner EXIT + +load_generator_instance_id="$(tf_output load_generator_instance_id)" +load_generator_k6_dir="$(tf_output load_generator_k6_dir)" +tf_target_base_url="$(tf_output load_test_target_base_url)" +tf_prometheus_remote_write_url="$(tf_output k6_prometheus_remote_write_url)" + +TARGET_BASE_URL="${TARGET_BASE_URL:-$tf_target_base_url}" +PROMETHEUS_REMOTE_WRITE_URL="${PROMETHEUS_REMOTE_WRITE_URL:-$tf_prometheus_remote_write_url}" + +wait_for_ssm "$load_generator_instance_id" + +for relative_path in \ + "createPost.json" \ + "updatePost.json" \ + "whole-user-flow.js" \ + "set_up_xk6.sh"; do + sync_file "$load_generator_instance_id" "$load_generator_k6_dir" "$relative_path" +done + +run_commands_json="$(jq -cn \ + --arg k6_dir "$load_generator_k6_dir" \ + --arg script "$K6_SCRIPT" \ + --arg target_base_url "$TARGET_BASE_URL" \ + --arg prometheus_url "$PROMETHEUS_REMOTE_WRITE_URL" \ + --arg vus "$K6_VUS" \ + --arg iterations "$K6_ITERATIONS" \ + --arg max_duration "$K6_MAX_DURATION" \ + --arg rebuild_k6 "$REBUILD_K6" \ + '{ + commands: [ + "set -euo pipefail", + "cd \($k6_dir)", + "pkill -f '\''(^|/)k6( |$)'\'' || true", + "chmod +x set_up_xk6.sh", + "chown -R ubuntu:ubuntu \($k6_dir)", + "if [ \($rebuild_k6 | @sh) = '\''true'\'' ]; then rm -f ./k6; fi", + "if [ ! -x ./k6 ]; then sudo -u ubuntu -H ./set_up_xk6.sh; fi", + "sudo -u ubuntu -H env BASE_URL=\($target_base_url | @sh) K6_PROMETHEUS_RW_SERVER_URL=\($prometheus_url | @sh) K6_PROMETHEUS_RW_TREND_STATS=\"p(90),p(95),p(99),avg,min,max\" K6_VUS=\($vus | @sh) K6_ITERATIONS=\($iterations | @sh) K6_MAX_DURATION=\($max_duration | @sh) ./k6 run \(if $prometheus_url != \"\" then \"-o experimental-prometheus-rw \" else \"\" end)\($script | @sh)" + ] + }')" + +send_ssm_command "$load_generator_instance_id" "Run k6 load test" "$run_commands_json" diff --git a/scripts/load_test/start.sh b/scripts/load_test/start.sh new file mode 100644 index 0000000..f7ca2c8 --- /dev/null +++ b/scripts/load_test/start.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +set -euo pipefail + +TERRAFORM_DIR="environment/load_test" +VAR_FILE="../../config/secrets/load_test.tfvars" +DATABASE_NAME="" +SWITCH_STAGE_TO_LOADTEST="false" +STAGE_APP_DIR="/home/ubuntu/solid-connection-dev" +STAGE_COMPOSE_FILE="docker-compose.dev.yml" +SSM_COMMAND_TIMEOUT_SECONDS="${SSM_COMMAND_TIMEOUT_SECONDS:-1800}" +SKIP_TERRAFORM_APPLY="false" + +usage() { + cat <<'EOF' +Usage: scripts/load_test/start.sh [options] + +Options: + --terraform-dir PATH Default: environment/load_test + --var-file PATH Default: ../../config/secrets/load_test.tfvars + --database-name VALUE Default: Terraform output load_test_db_name + --switch-stage-to-loadtest Restart stage app through SSM with dev,loadtest profiles + --stage-app-dir PATH Default: /home/ubuntu/solid-connection-dev + --stage-compose-file VALUE Default: docker-compose.dev.yml + --ssm-command-timeout-seconds Default: 1800 + --skip-terraform-apply + -h, --help +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --terraform-dir) TERRAFORM_DIR="$2"; shift 2 ;; + --var-file) VAR_FILE="$2"; shift 2 ;; + --database-name) DATABASE_NAME="$2"; shift 2 ;; + --switch-stage-to-loadtest) SWITCH_STAGE_TO_LOADTEST="true"; shift ;; + --stage-app-dir) STAGE_APP_DIR="$2"; shift 2 ;; + --stage-compose-file) STAGE_COMPOSE_FILE="$2"; shift 2 ;; + --ssm-command-timeout-seconds) SSM_COMMAND_TIMEOUT_SECONDS="$2"; shift 2 ;; + --skip-terraform-apply) SKIP_TERRAFORM_APPLY="true"; shift ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; + esac +done + +require_value() { + local name="$1" + local value="$2" + if [[ -z "$value" ]]; then + echo "Missing required option: $name" >&2 + exit 1 + fi +} + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Required command not found: $1" >&2 + exit 1 + fi +} + +require_command terraform +require_command aws +require_command jq + +tf_output() { + terraform -chdir="$TERRAFORM_DIR" output -raw "$1" +} + +send_ssm_command() { + local instance_id="$1" + local comment="$2" + local commands_json="$3" + + local command_id + local started_at + command_id="$(aws ssm send-command \ + --instance-ids "$instance_id" \ + --document-name "AWS-RunShellScript" \ + --comment "$comment" \ + --parameters "$commands_json" \ + --query "Command.CommandId" \ + --output text)" + started_at="$(date +%s)" + + local status + while true; do + sleep 5 + status="$(aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --query "Status" \ + --output text 2>/dev/null || true)" + + if (( $(date +%s) - started_at > SSM_COMMAND_TIMEOUT_SECONDS )); then + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command timed out after ${SSM_COMMAND_TIMEOUT_SECONDS}s: $comment" >&2 + exit 1 + fi + + case "$status" in + Pending|InProgress|Delayed|"") continue ;; + Success) break ;; + *) + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command failed with status $status: $comment" >&2 + exit 1 + ;; + esac + done +} + +if [[ "$SKIP_TERRAFORM_APPLY" != "true" ]]; then + terraform -chdir="$TERRAFORM_DIR" init + terraform -chdir="$TERRAFORM_DIR" apply -auto-approve -var-file="$VAR_FILE" +fi + +stage_instance_id="$(tf_output stage_api_instance_id)" +stage_public_ip="$(tf_output stage_api_public_ip)" +loadtest_endpoint="$(tf_output load_test_rds_endpoint)" +loadtest_port="$(tf_output load_test_rds_port)" +loadtest_db_name="$(tf_output load_test_db_name)" + +DATABASE_NAME="${DATABASE_NAME:-$loadtest_db_name}" + +if [[ "$SWITCH_STAGE_TO_LOADTEST" == "true" ]]; then + stage_commands_json="$(jq -cn \ + --arg app_dir "$STAGE_APP_DIR" \ + --arg compose_file "$STAGE_COMPOSE_FILE" \ + '{ + commands: [ + "set -euo pipefail", + "cd \($app_dir)", + "CURRENT_IMAGE=$(docker inspect -f '\''{{.Config.Image}}'\'' solid-connection-dev 2>/dev/null || true)", + "if [ -z \"$CURRENT_IMAGE\" ]; then echo \"solid-connection-dev container is not running; cannot infer image tag\" >&2; exit 1; fi", + "OWNER_LOWERCASE=$(echo \"$CURRENT_IMAGE\" | sed -E '\''s#^ghcr.io/([^/]+)/.*#\\1#'\'')", + "IMAGE_TAG=$(echo \"$CURRENT_IMAGE\" | sed -E '\''s#.*:([^:]+)$#\\1#'\'')", + "cat > docker-compose.loadtest.override.yml <<'\''YAML'\''\nservices:\n solid-connection-dev:\n environment:\n - SPRING_PROFILES_ACTIVE=dev,loadtest\n - AWS_REGION=ap-northeast-2\n - SPRING_DATA_REDIS_HOST=127.0.0.1\n - SPRING_DATA_REDIS_PORT=6379\nYAML", + "docker compose -f \($compose_file) -f docker-compose.loadtest.override.yml down || true", + "OWNER_LOWERCASE=\"$OWNER_LOWERCASE\" IMAGE_TAG=\"$IMAGE_TAG\" docker compose -f \($compose_file) -f docker-compose.loadtest.override.yml up -d solid-connection-dev" + ] + }')" + + send_ssm_command "$stage_instance_id" "Switch stage app to load test datasource" "$stage_commands_json" +fi + +echo "Load test environment is ready." +echo "RDS endpoint: ${loadtest_endpoint}:${loadtest_port}" +echo "Load generator instance: created by Load Test Run" +echo "Stage instance: ${stage_instance_id}" +echo "Stage public IP: ${stage_public_ip}" diff --git a/scripts/load_test/stop.sh b/scripts/load_test/stop.sh new file mode 100644 index 0000000..005a61c --- /dev/null +++ b/scripts/load_test/stop.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +set -euo pipefail + +TERRAFORM_DIR="environment/load_test" +VAR_FILE="../../config/secrets/load_test.tfvars" +RESTORE_STAGE_DEV="false" +STAGE_APP_DIR="/home/ubuntu/solid-connection-dev" +STAGE_COMPOSE_FILE="docker-compose.dev.yml" +SSM_COMMAND_TIMEOUT_SECONDS="${SSM_COMMAND_TIMEOUT_SECONDS:-900}" +SKIP_TERRAFORM_DESTROY="false" + +usage() { + cat <<'EOF' +Usage: scripts/load_test/stop.sh [options] + +Options: + --terraform-dir PATH Default: environment/load_test + --var-file PATH Default: ../../config/secrets/load_test.tfvars + --restore-stage-dev Restart stage app through SSM with dev profile + --stage-app-dir PATH Default: /home/ubuntu/solid-connection-dev + --stage-compose-file VALUE Default: docker-compose.dev.yml + --ssm-command-timeout-seconds Default: 900 + --skip-terraform-destroy + -h, --help +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --terraform-dir) TERRAFORM_DIR="$2"; shift 2 ;; + --var-file) VAR_FILE="$2"; shift 2 ;; + --restore-stage-dev) RESTORE_STAGE_DEV="true"; shift ;; + --stage-app-dir) STAGE_APP_DIR="$2"; shift 2 ;; + --stage-compose-file) STAGE_COMPOSE_FILE="$2"; shift 2 ;; + --ssm-command-timeout-seconds) SSM_COMMAND_TIMEOUT_SECONDS="$2"; shift 2 ;; + --skip-terraform-destroy) SKIP_TERRAFORM_DESTROY="true"; shift ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; + esac +done + +require_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Required command not found: $1" >&2 + exit 1 + fi +} + +require_command terraform +require_command aws +require_command jq + +tf_output() { + terraform -chdir="$TERRAFORM_DIR" output -raw "$1" +} + +send_ssm_command() { + local instance_id="$1" + local comment="$2" + local commands_json="$3" + + local command_id + local started_at + command_id="$(aws ssm send-command \ + --instance-ids "$instance_id" \ + --document-name "AWS-RunShellScript" \ + --comment "$comment" \ + --parameters "$commands_json" \ + --query "Command.CommandId" \ + --output text)" + started_at="$(date +%s)" + + local status + while true; do + sleep 5 + status="$(aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --query "Status" \ + --output text 2>/dev/null || true)" + + if (( $(date +%s) - started_at > SSM_COMMAND_TIMEOUT_SECONDS )); then + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command timed out after ${SSM_COMMAND_TIMEOUT_SECONDS}s: $comment" >&2 + exit 1 + fi + + case "$status" in + Pending|InProgress|Delayed|"") continue ;; + Success) break ;; + *) + aws ssm get-command-invocation \ + --command-id "$command_id" \ + --instance-id "$instance_id" \ + --output json || true + echo "SSM command failed with status $status: $comment" >&2 + exit 1 + ;; + esac + done +} + +terraform -chdir="$TERRAFORM_DIR" init + +if [[ "$RESTORE_STAGE_DEV" == "true" ]]; then + stage_instance_id="$(tf_output stage_api_instance_id)" + + stage_commands_json="$(jq -cn \ + --arg app_dir "$STAGE_APP_DIR" \ + --arg compose_file "$STAGE_COMPOSE_FILE" \ + '{ + commands: [ + "set -euo pipefail", + "cd \($app_dir)", + "CURRENT_IMAGE=$(docker inspect -f '\''{{.Config.Image}}'\'' solid-connection-dev 2>/dev/null || true)", + "if [ -z \"$CURRENT_IMAGE\" ]; then echo \"solid-connection-dev container is not running; cannot infer image tag\" >&2; exit 1; fi", + "OWNER_LOWERCASE=$(echo \"$CURRENT_IMAGE\" | sed -E '\''s#^ghcr.io/([^/]+)/.*#\\1#'\'')", + "IMAGE_TAG=$(echo \"$CURRENT_IMAGE\" | sed -E '\''s#.*:([^:]+)$#\\1#'\'')", + "rm -f docker-compose.loadtest.override.yml", + "docker compose -f \($compose_file) down || true", + "OWNER_LOWERCASE=\"$OWNER_LOWERCASE\" IMAGE_TAG=\"$IMAGE_TAG\" docker compose -f \($compose_file) up -d" + ] + }')" + + send_ssm_command "$stage_instance_id" "Restore stage app to dev datasource" "$stage_commands_json" +fi + +if [[ "$SKIP_TERRAFORM_DESTROY" != "true" ]]; then + terraform -chdir="$TERRAFORM_DIR" destroy -auto-approve -var-file="$VAR_FILE" +fi + +echo "Load test environment has been stopped."