• tests help individual developers sanity check their Terraform changes
  • tests can auto-approve run-of-the-mill infrastructure changes and reduce the burden of peer-review
  • tests can help catch problems that arise when applying Terraform to production after applying it to staging

In this tutorial, you’ll learn how to use OPA to implement unit tests for Terraform plans that create and delete auto-scaling groups and servers.

Prerequisites

This tutorial requires

(This tutorial should also work with the , but it is untested. Contributions welcome!)

Getting Started

Create a file that includes an auto-scaling group and a server on AWS. (You will need to modify the to point to your AWS credentials.)

Then initialize Terraform and ask it to calculate what changes it will make and store the output in plan.binary.

  1. terraform init
  2. terraform plan --out tfplan.binary

2. Convert the Terraform plan into JSON

Use the command to convert the Terraform plan into JSON so that OPA can read the plan.

  1. terraform show -json tfplan.binary > tfplan.json

Here is the expected contents of tfplan.json.

  1. {
  2. "format_version": "0.1",
  3. "terraform_version": "0.12.6",
  4. "planned_values": {
  5. "root_module": {
  6. "resources": [
  7. {
  8. "address": "aws_autoscaling_group.my_asg",
  9. "mode": "managed",
  10. "type": "aws_autoscaling_group",
  11. "name": "my_asg",
  12. "provider_name": "aws",
  13. "schema_version": 0,
  14. "values": {
  15. "availability_zones": [
  16. "us-west-1a"
  17. ],
  18. "desired_capacity": 4,
  19. "enabled_metrics": null,
  20. "force_delete": true,
  21. "health_check_grace_period": 300,
  22. "health_check_type": "ELB",
  23. "initial_lifecycle_hook": [],
  24. "launch_configuration": "my_web_config",
  25. "launch_template": [],
  26. "max_size": 5,
  27. "metrics_granularity": "1Minute",
  28. "min_elb_capacity": null,
  29. "min_size": 1,
  30. "mixed_instances_policy": [],
  31. "name": "my_asg",
  32. "name_prefix": null,
  33. "placement_group": null,
  34. "protect_from_scale_in": false,
  35. "suspended_processes": null,
  36. "tag": [],
  37. "tags": null,
  38. "termination_policies": null,
  39. "timeouts": null,
  40. "wait_for_capacity_timeout": "10m",
  41. "wait_for_elb_capacity": null
  42. }
  43. },
  44. {
  45. "address": "aws_instance.web",
  46. "mode": "managed",
  47. "type": "aws_instance",
  48. "name": "web",
  49. "provider_name": "aws",
  50. "schema_version": 1,
  51. "values": {
  52. "ami": "ami-09b4b74c",
  53. "credit_specification": [],
  54. "disable_api_termination": null,
  55. "ebs_optimized": null,
  56. "get_password_data": false,
  57. "iam_instance_profile": null,
  58. "instance_initiated_shutdown_behavior": null,
  59. "instance_type": "t2.micro",
  60. "monitoring": null,
  61. "source_dest_check": true,
  62. "tags": null,
  63. "timeouts": null,
  64. "user_data": null,
  65. "user_data_base64": null
  66. }
  67. },
  68. {
  69. "address": "aws_launch_configuration.my_web_config",
  70. "mode": "managed",
  71. "type": "aws_launch_configuration",
  72. "name": "my_web_config",
  73. "provider_name": "aws",
  74. "schema_version": 0,
  75. "values": {
  76. "associate_public_ip_address": false,
  77. "enable_monitoring": true,
  78. "ephemeral_block_device": [],
  79. "iam_instance_profile": null,
  80. "image_id": "ami-09b4b74c",
  81. "instance_type": "t2.micro",
  82. "name": "my_web_config",
  83. "name_prefix": null,
  84. "placement_tenancy": null,
  85. "security_groups": null,
  86. "spot_price": null,
  87. "user_data": null,
  88. "user_data_base64": null,
  89. "vpc_classic_link_id": null,
  90. "vpc_classic_link_security_groups": null
  91. }
  92. }
  93. ]
  94. }
  95. },
  96. "resource_changes": [
  97. {
  98. "address": "aws_autoscaling_group.my_asg",
  99. "mode": "managed",
  100. "type": "aws_autoscaling_group",
  101. "name": "my_asg",
  102. "provider_name": "aws",
  103. "change": {
  104. "actions": [
  105. "create"
  106. ],
  107. "before": null,
  108. "after": {
  109. "availability_zones": [
  110. "us-west-1a"
  111. ],
  112. "desired_capacity": 4,
  113. "enabled_metrics": null,
  114. "force_delete": true,
  115. "health_check_grace_period": 300,
  116. "health_check_type": "ELB",
  117. "initial_lifecycle_hook": [],
  118. "launch_configuration": "my_web_config",
  119. "launch_template": [],
  120. "max_size": 5,
  121. "metrics_granularity": "1Minute",
  122. "min_elb_capacity": null,
  123. "min_size": 1,
  124. "mixed_instances_policy": [],
  125. "name": "my_asg",
  126. "name_prefix": null,
  127. "placement_group": null,
  128. "protect_from_scale_in": false,
  129. "suspended_processes": null,
  130. "tag": [],
  131. "tags": null,
  132. "termination_policies": null,
  133. "timeouts": null,
  134. "wait_for_capacity_timeout": "10m",
  135. "wait_for_elb_capacity": null
  136. },
  137. "after_unknown": {
  138. "arn": true,
  139. "availability_zones": [
  140. false
  141. ],
  142. "default_cooldown": true,
  143. "id": true,
  144. "initial_lifecycle_hook": [],
  145. "launch_template": [],
  146. "load_balancers": true,
  147. "mixed_instances_policy": [],
  148. "service_linked_role_arn": true,
  149. "tag": [],
  150. "target_group_arns": true,
  151. "vpc_zone_identifier": true
  152. }
  153. }
  154. },
  155. {
  156. "address": "aws_instance.web",
  157. "mode": "managed",
  158. "type": "aws_instance",
  159. "name": "web",
  160. "provider_name": "aws",
  161. "change": {
  162. "actions": [
  163. "create"
  164. ],
  165. "before": null,
  166. "after": {
  167. "ami": "ami-09b4b74c",
  168. "credit_specification": [],
  169. "disable_api_termination": null,
  170. "ebs_optimized": null,
  171. "get_password_data": false,
  172. "iam_instance_profile": null,
  173. "instance_initiated_shutdown_behavior": null,
  174. "instance_type": "t2.micro",
  175. "monitoring": null,
  176. "source_dest_check": true,
  177. "tags": null,
  178. "timeouts": null,
  179. "user_data": null,
  180. "user_data_base64": null
  181. },
  182. "after_unknown": {
  183. "arn": true,
  184. "associate_public_ip_address": true,
  185. "availability_zone": true,
  186. "cpu_core_count": true,
  187. "cpu_threads_per_core": true,
  188. "credit_specification": [],
  189. "ebs_block_device": true,
  190. "ephemeral_block_device": true,
  191. "host_id": true,
  192. "id": true,
  193. "instance_state": true,
  194. "ipv6_addresses": true,
  195. "key_name": true,
  196. "network_interface": true,
  197. "network_interface_id": true,
  198. "password_data": true,
  199. "placement_group": true,
  200. "primary_network_interface_id": true,
  201. "private_dns": true,
  202. "private_ip": true,
  203. "public_dns": true,
  204. "public_ip": true,
  205. "root_block_device": true,
  206. "subnet_id": true,
  207. "tenancy": true,
  208. "volume_tags": true,
  209. "vpc_security_group_ids": true
  210. }
  211. }
  212. },
  213. {
  214. "address": "aws_launch_configuration.my_web_config",
  215. "mode": "managed",
  216. "type": "aws_launch_configuration",
  217. "name": "my_web_config",
  218. "provider_name": "aws",
  219. "change": {
  220. "actions": [
  221. "create"
  222. ],
  223. "before": null,
  224. "after": {
  225. "associate_public_ip_address": false,
  226. "enable_monitoring": true,
  227. "ephemeral_block_device": [],
  228. "iam_instance_profile": null,
  229. "image_id": "ami-09b4b74c",
  230. "instance_type": "t2.micro",
  231. "name": "my_web_config",
  232. "name_prefix": null,
  233. "placement_tenancy": null,
  234. "security_groups": null,
  235. "spot_price": null,
  236. "user_data": null,
  237. "user_data_base64": null,
  238. "vpc_classic_link_id": null,
  239. "vpc_classic_link_security_groups": null
  240. },
  241. "after_unknown": {
  242. "ebs_block_device": true,
  243. "ebs_optimized": true,
  244. "ephemeral_block_device": [],
  245. "id": true,
  246. "key_name": true,
  247. "root_block_device": true
  248. }
  249. }
  250. }
  251. ],
  252. "configuration": {
  253. "provider_config": {
  254. "aws": {
  255. "name": "aws",
  256. "expressions": {
  257. "region": {
  258. "constant_value": "us-west-1"
  259. }
  260. }
  261. }
  262. },
  263. "root_module": {
  264. "resources": [
  265. {
  266. "address": "aws_autoscaling_group.my_asg",
  267. "mode": "managed",
  268. "type": "aws_autoscaling_group",
  269. "name": "my_asg",
  270. "provider_config_key": "aws",
  271. "expressions": {
  272. "availability_zones": {
  273. "constant_value": [
  274. "us-west-1a"
  275. ]
  276. },
  277. "desired_capacity": {
  278. "constant_value": 4
  279. },
  280. "force_delete": {
  281. "constant_value": true
  282. },
  283. "health_check_grace_period": {
  284. "constant_value": 300
  285. },
  286. "health_check_type": {
  287. "constant_value": "ELB"
  288. },
  289. "launch_configuration": {
  290. "constant_value": "my_web_config"
  291. },
  292. "max_size": {
  293. "constant_value": 5
  294. },
  295. "min_size": {
  296. "constant_value": 1
  297. },
  298. "name": {
  299. "constant_value": "my_asg"
  300. }
  301. },
  302. "schema_version": 0
  303. },
  304. {
  305. "address": "aws_instance.web",
  306. "mode": "managed",
  307. "type": "aws_instance",
  308. "name": "web",
  309. "provider_config_key": "aws",
  310. "expressions": {
  311. "ami": {
  312. "constant_value": "ami-09b4b74c"
  313. },
  314. "instance_type": {
  315. "constant_value": "t2.micro"
  316. }
  317. },
  318. "schema_version": 1
  319. },
  320. {
  321. "address": "aws_launch_configuration.my_web_config",
  322. "mode": "managed",
  323. "type": "aws_launch_configuration",
  324. "name": "my_web_config",
  325. "provider_config_key": "aws",
  326. "expressions": {
  327. "image_id": {
  328. "constant_value": "ami-09b4b74c"
  329. },
  330. "instance_type": {
  331. "constant_value": "t2.micro"
  332. },
  333. "name": {
  334. "constant_value": "my_web_config"
  335. }
  336. },
  337. "schema_version": 0
  338. }
  339. ]
  340. }
  341. }
  342. }

The json plan output produced by terraform contains a lot of information. For this tutorial, we will be interested by:

  • .resource_changes: array containing all the actions that terraform will apply on the infrastructure.
  • .resource_changes[].type: the type of resource (eg aws_instance , aws_iam …)
  • .resource_changes[].change.actions: array of actions applied on the resource (create, update, delete…)

For more information about the json plan representation, please check the terraform documentation

3. Write the OPA policy to check the plan

The policy computes a score for a Terraform that combines

  • The number of deletions of each resource type
  • The number of creations of each resource type
  • The number of modifications of each resource type

The policy authorizes the plan when the score for the plan is below a threshold and there are no changes made to any IAM resources. (For simplicity, the threshold in this tutorial is the same for everyone, but in practice you would vary the threshold depending on the user.)

  1. package terraform.analysis
  2. import input as tfplan
  3. ########################
  4. # Parameters for Policy
  5. ########################
  6. # acceptable score for automated authorization
  7. blast_radius = 30
  8. # weights assigned for each operation on each resource-type
  9. weights = {
  10. "aws_autoscaling_group": {"delete": 100, "create": 10, "modify": 1},
  11. "aws_instance": {"delete": 10, "create": 1, "modify": 1}
  12. }
  13. # Consider exactly these resource types in calculations
  14. resource_types = {"aws_autoscaling_group", "aws_instance", "aws_iam", "aws_launch_configuration"}
  15. #########
  16. # Policy
  17. #########
  18. # Authorization holds if score for the plan is acceptable and no changes are made to IAM
  19. default authz = false
  20. authz {
  21. score < blast_radius
  22. not touches_iam
  23. }
  24. # Compute the score for a Terraform plan as the weighted sum of deletions, creations, modifications
  25. score = s {
  26. all := [ x |
  27. some resource_type
  28. crud := weights[resource_type];
  29. del := crud["delete"] * num_deletes[resource_type];
  30. new := crud["create"] * num_creates[resource_type];
  31. mod := crud["modify"] * num_modifies[resource_type];
  32. x := del + new + mod
  33. ]
  34. s := sum(all)
  35. }
  36. # Whether there is any change to IAM
  37. touches_iam {
  38. all := resources["aws_iam"]
  39. count(all) > 0
  40. }
  41. ####################
  42. # Terraform Library
  43. ####################
  44. # list of all resources of a given type
  45. resources[resource_type] = all {
  46. some resource_type
  47. resource_types[resource_type]
  48. all := [name |
  49. name:= tfplan.resource_changes[_]
  50. name.type == resource_type
  51. ]
  52. # number of creations of resources of a given type
  53. num_creates[resource_type] = num {
  54. resource_types[resource_type]
  55. all := resources[resource_type]
  56. creates := [res | res:= all[_]; res.change.actions[_] == "create"]
  57. num := count(creates)
  58. }
  59. # number of deletions of resources of a given type
  60. num_deletes[resource_type] = num {
  61. some resource_type
  62. resource_types[resource_type]
  63. all := resources[resource_type]
  64. deletions := [res | res:= all[_]; res.change.actions[_] == "delete"]
  65. num := count(deletions)
  66. }
  67. # number of modifications to resources of a given type
  68. num_modifies[resource_type] = num {
  69. some resource_type
  70. resource_types[resource_type]
  71. all := resources[resource_type]
  72. modifies := [res | res:= all[_]; res.change.actions[_] == "update"]
  73. num := count(modifies)
  74. }

To evaluate the policy against that plan, you hand OPA the policy, the Terraform plan as input, and ask it to evaluate data.terraform.analysis.authz.

  1. opa eval --format pretty --data terraform.rego --input tfplan.json "data.terraform.analysis.authz"
  1. data.terraform.analysis.authz

If you’re curious, you can ask for the score that the policy used to make the authorization decision. In our example, it is 11 (10 for the creation of the auto-scaling group and 1 for the creation of the server).

  1. opa eval --format pretty --data terraform.rego --input tfplan.json "data.terraform.analysis.score"
  1. data.terraform.analysis.score
  1. 11

If as suggested in the previous step, you want to modify your policy to make an authorization decision based on both the user and the Terraform plan, the input you would give to OPA would take the form {"user": <user>, "plan": <plan>}, and your policy would reference the user with input.user and the plan with input.plan. You could even go so far as to provide the Terraform state file and the AWS EC2 data to OPA and write policy using all of that context.

5. Create a Large Terraform plan and Evaluate it

Create a Terraform plan that creates enough resources to exceed the blast-radius permitted by policy.

  1. cat >main.tf <<EOF
  2. provider "aws" {
  3. region = "us-west-1"
  4. }
  5. resource "aws_instance" "web" {
  6. instance_type = "t2.micro"
  7. ami = "ami-09b4b74c"
  8. }
  9. resource "aws_autoscaling_group" "my_asg" {
  10. availability_zones = ["us-west-1a"]
  11. name = "my_asg"
  12. max_size = 5
  13. min_size = 1
  14. health_check_grace_period = 300
  15. health_check_type = "ELB"
  16. desired_capacity = 4
  17. force_delete = true
  18. launch_configuration = "my_web_config"
  19. }
  20. resource "aws_launch_configuration" "my_web_config" {
  21. name = "my_web_config"
  22. image_id = "ami-09b4b74c"
  23. instance_type = "t2.micro"
  24. }
  25. resource "aws_autoscaling_group" "my_asg2" {
  26. availability_zones = ["us-west-2a"]
  27. name = "my_asg2"
  28. max_size = 6
  29. min_size = 1
  30. health_check_grace_period = 300
  31. health_check_type = "ELB"
  32. desired_capacity = 4
  33. force_delete = true
  34. launch_configuration = "my_web_config"
  35. }
  36. resource "aws_autoscaling_group" "my_asg3" {
  37. availability_zones = ["us-west-2b"]
  38. name = "my_asg3"
  39. max_size = 7
  40. min_size = 1
  41. health_check_grace_period = 300
  42. health_check_type = "ELB"
  43. desired_capacity = 4
  44. force_delete = true
  45. launch_configuration = "my_web_config"
  46. }
  47. EOF

Generate the Terraform plan and convert it to JSON.

  1. terraform init
  2. terraform plan --out tfplan_large.binary
  3. terraform show -json tfplan_large.binary > tfplan_large.json

Evaluate the policy to see that it fails the policy tests and check the score.

  1. opa eval --data terraform.rego --input tfplan_large.json "data.terraform.analysis.authz"
  2. opa eval --data terraform.rego --input tfplan_large.json "data.terraform.analysis.score"

6. (Optional) Run OPA as a daemon and evaluate policy

In addition to running OPA from the command-line, you can run it as a daemon loaded with the Terraform policy and then interact with it using its HTTP API. First, start the daemon:

Then in a separate terminal, use OPA’s HTTP API to evaluate the policy against the two Terraform plans.

  1. curl localhost:8181/v0/data/terraform/analysis/authz -d @tfplan.json
  2. curl localhost:8181/v0/data/terraform/analysis/authz -d @tfplan_large.json

Wrap Up

Congratulations for finishing the tutorial!

You learned a number of things about Terraform Testing with OPA:

  • OPA gives you fine-grained policy control over Terraform plans.
  • You can use data other than the plan itself (e.g. the user) when writing authorization policies.

Keep in mind that it’s up to you to decide how to use OPA’s Terraform tests and authorization decision. Here are some ideas.

  • Add it as part of your Terraform wrapper to implement unit tests on Terraform plans
  • Use it to automatically approve run-of-the-mill Terraform changes to reduce the burden of peer-review
  • Embed it into your deployment system to catch problems that arise when applying Terraform to production after applying it to staging

Working with Modules

Create a new Terraform file that includes a security group and security group from a module. (This example uses the module from https://github.com/terraform-aws-modules/terraform-aws-security-group)

  1. cat >main.tf <<EOF
  2. provider "aws" {
  3. region = "us-east-1"
  4. }
  5. data "aws_vpc" "default" {
  6. default = true
  7. }
  8. module "http_sg" {
  9. source = "git::https://github.com/terraform-aws-modules/terraform-aws-security-group.git?ref=v3.10.0"
  10. name = "http-sg"
  11. description = "Security group with HTTP ports open for everybody (IPv4 CIDR), egress ports are all world open"
  12. vpc_id = data.aws_vpc.default.id
  13. ingress_cidr_blocks = ["0.0.0.0/0"]
  14. }
  15. resource "aws_security_group" "allow_tls" {
  16. name = "allow_tls"
  17. description = "Allow TLS inbound traffic"
  18. vpc_id = data.aws_vpc.default.id
  19. ingress {
  20. description = "TLS from VPC"
  21. from_port = 443
  22. to_port = 443
  23. protocol = "tcp"
  24. cidr_blocks = ["10.0.0.0/8"]
  25. }
  26. egress {
  27. from_port = 0
  28. to_port = 0
  29. protocol = "-1"
  30. cidr_blocks = ["0.0.0.0/0"]
  31. }
  32. tags = {
  33. Name = "allow_tls"
  34. }
  35. }
  36. EOF

Then initialize Terraform and ask it to calculate what changes it will make and store the output in tfplan.binary.

  1. terraform init
  2. terraform plan --out tfplan.binary

2. Convert the new Terraform plan into JSON

Use the Terraform show command to produce the json representation of the terraform plan

  1. terraform show -json tfplan.binary > tfplan2.json

3. Write the OPA policy to collect resources

The policy evaluates if a security group is valid based on the contents of it’s description:

  • Resources can be specified under the root module or in child modules
  • We want to evaluate against the combined group of these resources
  • This example is scoped to the planned changes section of the json representation

The policy uses the walk keyword to explore the json structure, and uses conditions to filter for the specific paths where resources would be found.

terraform_module.rego:

  1. package terraform.module
  2. deny[msg] {
  3. desc := resources[r].values.description
  4. contains(desc, "HTTP")
  5. msg = sprintf("No security groups should be using HTTP. Resource in violation: %v",[r.address])
  6. }
  7. resources := { r |
  8. some path, value
  9. # Walk over the JSON tree and check if the node we are
  10. # currently on is a module (either root or child) resources
  11. # value.
  12. walk(input.planned_values, [path, value])
  13. # Look for resources in the current value based on path
  14. rs := module_resources(path, value)
  15. # Aggregate them into `resources`
  16. r := rs[_]
  17. }
  18. # Variant to match root_module resources
  19. module_resources(path, value) = rs {
  20. # Expect something like:
  21. #
  22. # {
  23. # "root_module": {
  24. # "resources": [...],
  25. # ...
  26. # }
  27. # ...
  28. # }
  29. #
  30. # Where the path is [..., "root_module", "resources"]
  31. reverse_index(path, 1) == "resources"
  32. reverse_index(path, 2) == "root_module"
  33. rs := value
  34. }
  35. # Variant to match child_modules resources
  36. module_resources(path, value) = rs {
  37. # Expect something like:
  38. #
  39. # {
  40. # ...
  41. # "child_modules": [
  42. # {
  43. # "resources": [...],
  44. # ...
  45. # },
  46. # ...
  47. # ]
  48. # ...
  49. # }
  50. #
  51. # Where the path is [..., "child_modules", 0, "resources"]
  52. # Note that there will always be an index int between `child_modules`
  53. # and `resources`. We know that walk will only visit each one once,
  54. # so we shouldn't need to keep track of what the index is.
  55. reverse_index(path, 1) == "resources"
  56. reverse_index(path, 3) == "child_modules"
  57. rs := value
  58. }
  59. reverse_index(path, idx) = value {
  60. value := path[count(path) - idx]
  61. }

To evaluate the policy against that plan, you hand OPA the policy, the Terraform plan as input, and ask it to evaluate data.terraform.module.deny.

  1. opa eval --format pretty --data terraform_module.rego --input tfplan2.json "data.terraform.module.deny"

This should return one of the two resources. The security group created by the module uses HTTP in its description and therefore fails the evaluation.

Module Wrap Up

Congratulations for finishing the tutorial!

You learned OPA can be used to determine if a proposed configuration is authorized.

Additional use cases might include:

  • Ensuring all resources have tags before they are created
  • Security or operational requirements