Gym/resources_servers/indirect_prompt_injection/configs/indirect_prompt_injection.yaml at main · NVIDIA-NeMo/Gym · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
indirect_prompt_injection_resources_server:
  resources_servers:
    indirect_prompt_injection:
      entrypoint: app.py
      domain: safety
      verified: false
      description: Indirect prompt injection resistance for multi-domain tool-use agents
      value: Improve agentic security by teaching robustness against tool outputs containing malicious instructions
indirect_prompt_injection_simple_agent:
  responses_api_agents:
    simple_agent:
      entrypoint: app.py
      resources_server:
        type: resources_servers
        name: indirect_prompt_injection_resources_server
      model_server:
        type: responses_api_models
        name: policy_model
      max_steps: 5
      datasets:
      - name: example
        type: example
        jsonl_fpath: resources_servers/indirect_prompt_injection/data/example.jsonl
      - name: train
        type: train
        jsonl_fpath: resources_servers/indirect_prompt_injection/data/train.jsonl
        gitlab_identifier:
          dataset_name: indirect_prompt_injection
          version: 0.0.1
          artifact_fpath: train.jsonl
        license: Apache 2.0
      - name: validation
        type: validation
        jsonl_fpath: resources_servers/indirect_prompt_injection/data/validation.jsonl
        num_repeats: 1
        gitlab_identifier:
          dataset_name: indirect_prompt_injection
          version: 0.0.1
          artifact_fpath: val.jsonl
        license: Apache 2.0