<?xml version="1.0" encoding="UTF-8"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. --> encoding="utf-8"?>

<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
    There has to be one entity for each item to be referenced.
    An alternate method (rfc include) is described in the references. -->

<!ENTITY RFC1918 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1918.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2544 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2544.xml">
<!ENTITY RFC3022 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3022.xml">
  <!ENTITY RFC4340 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4340.xml"> nbsp    "&#160;">
  <!ENTITY RFC4814 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4814.xml"> zwsp   "&#8203;">
  <!ENTITY RFC5180 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5180.xml"> nbhy   "&#8209;">
  <!ENTITY RFC6146 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6146.xml">
<!ENTITY RFC7599 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7599.xml">
<!ENTITY RFC8174 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY RFC8219 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8219.xml">
<!ENTITY RFC9000 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.9000.xml">
<!ENTITY RFC9260 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.9260.xml">
<!ENTITY RFC9411 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.9411.xml"> wj     "&#8288;">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->

<rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="info" docName="draft-ietf-bmwg-benchmarking-stateful-09" ipr="trust200902"> number="9693" consensus="true" ipr="trust200902" obsoletes="" updates="" submissionType="IETF" xml:lang="en" tocInclude="true" tocDepth="4" symRefs="true" sortRefs="true" version="3">

  <front>

<!-- The abbreviated title [rfced] As RFC 4814 is used mentioned in this document's Abstract and
Introduction, may we remove the page header - reference to it is only necessary if from the
          full title is longer than 39 characters title?

Original:

    Benchmarking Methodology for Stateful NATxy Gateways using RFC 4814
                  Pseudorandom Port Numbers

Perhaps:

    Benchmarking Methodology for Stateful NATxy Gateways Using
                  Pseudorandom Port Numbers

-->

    <title abbrev="Benchmarking Stateful NATxy Gateways">Benchmarking
    Methodology for Stateful NATxy Gateways using Using RFC 4814 Pseudorandom Port
    Numbers</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->
    <seriesInfo name="RFC" value="9693"/>
    <author fullname="Gábor Lencse" initials="G." surname="Lencse">
      <organization>Széchenyi István University</organization>
      <address>
        <postal>
          <street>Egyetem tér 1.</street>
          <!-- Reorder these if your country does things differently -->
          <city>Győr</city>
          <region></region>
          <code>H-9026</code>
          <country>Hungary</country>
        </postal>
        <phone></phone>
        <email>lencse@sze.hu</email>
        <uri></uri>
      </address>
    </author>
    <author fullname="Keiichi Shima" initials="K." surname="Shima">
      <organization>SoftBank Corp.</organization>
      <address>
        <postal>
          <street>1-7-1 Kaigan</street>
          <city>Minato-ku</city>
          <region>Tokyo</region>
          <region>Minato-ku, Tokyo</region>
          <code>105-7529</code>
          <country>Japan</country>
        </postal>
        <phone></phone>
        <email>shima@wide.ad.jp</email>
        <uri>https://softbank.co.jp/</uri>
      </address>
    </author>
    <date year="2024" />

    <!-- Meta-data Declarations -->

    <area>Operations and Management Area</area>

    <workgroup>Benchmarking Methodology Working Group</workgroup>

    <!-- WG name at the upperleft corner of the doc,
          IETF is fine for individual submissions.
    If this element is not present, the default is "Network Working Group",
          which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>Benchmarking, Stateful NATxy, Measurement Procedure, Throughput, Frame month="December"/>

    <area>OPS</area>
    <workgroup>bmwg</workgroup>

    <keyword>Benchmarking</keyword>
    <keyword>Stateful NATxy</keyword>
    <keyword>Measurement Procedure</keyword>
    <keyword>Throughput</keyword>
    <keyword>Frame Loss Rate, Latency, PDV</keyword>

    <!-- Keywords will be incorporated into HTML output
          files in a meta tag but they have no effect on text or nroff
          output. If you submit your draft to the RFC Editor, the
          keywords will be used for the search engine. --> Rate</keyword>
    <keyword>Latency</keyword>
    <keyword>PDV</keyword>

    <abstract>
      <t>RFC 2544 has defined defines a benchmarking methodology for network
      interconnect devices. RFC 5180 addressed addresses IPv6 specificities specificities, and it also
      provided
      provides a technology update but excluded excludes IPv6 transition technologies.
      RFC 8219 addressed addresses IPv6 transition technologies, including stateful
      NAT64. However, none of them discussed discuss how to apply RFC 4814 pseudorandom port
      numbers from RFC 4814 to any stateful NATxy (NAT44, (such as NAT44, NAT64, and NAT66)
      technologies.  This document discusses why using pseudorandom port
      numbers with stateful NATxy gateways is a difficult problem. It
      recommends a solution limiting that limits the port number ranges and using uses two
      test phases (phase 1 and phase 2). It is shown This document shows how the classic
      performance measurement procedures (e.g. (e.g., throughput, frame loss rate,
      latency, etc.)  can be carried out.  New performance metrics and
      measurement procedures are also defined for measuring the maximum
      connection establishment rate, connection tear-down rate, and
      connection tracking table capacity.
      </t>
    </abstract>
  </front>

  <middle>
    <section anchor="intro" title="Introduction"> numbered="true" toc="default">
      <name>Introduction</name>

      <t><xref target="RFC2544"/> has defined target="RFC2544" format="default"/> defines a comprehensive
      benchmarking methodology for network interconnect devices, which devices that is still
      in use. It was is mainly indpendent of IP version independent, version, but it used uses IPv4 in its
      examples.  <xref target="RFC5180"/> addressed target="RFC5180" format="default"/> addresses IPv6
      specificities and also added adds technology updates, updates but declared declares IPv6
      transition technologies are out of its scope. <xref target="RFC8219"/> addressed target="RFC8219"
      format="default"/> addresses the IPv6 transition technologies, including
      stateful NAT64. It has reused reuses several benchmarking procedures from <xref target="RFC2544"/> (e.g.
      target="RFC2544" format="default"/> (e.g., throughput, frame loss rate),
      and it has redefined redefines the latency measurement and added adds further ones, e.g. ones (e.g., the PDV
	  (packet delay variation) measurement.</t>
	  <t>However,
      Packet Delay Variation (PDV) measurement).</t>

<!-- [rfced] Per Section 3.6 of RFC 7322 ("RFC Style Guide"), abbreviations
must be expanded upon first use. To avoid expanding "NAPT" upon first use
here and stacking multiple sets of parentheses, we have rephrased as
follows (because "NAPT" is introduced and expanded later in this document).
Please let us know of any objections.

Original:

   However, none of them discussed, how to apply <xref target="RFC4814"/> [RFC4814] pseudorandom
   port numbers, when benchmarking stateful NATxy (NAT44 (also called
   NAPT) <xref target="RFC3022"/>, [RFC3022], NAT64 <xref target="RFC6146"/>, [RFC6146], and NAT66) gateways.  (It should be
   noted that stateful NAT66 is not an IETF specification but refers to
   an IPv6 version of the stateful NAT44 specification.)

Current:

   However, none of them discussed how to apply pseudorandom port numbers from
   [RFC4814] when benchmarking stateful NATxy gateways (such as NAT44
   [RFC3022], NAT64 [RFC6146], and NAT66). (It should be
   noted that stateful NAT66 is not an IETF specification but refers to
   an IPv6 version of the stateful NAT44 specification.)

-->

      <t>However, none of them discuss how to apply pseudorandom port
      numbers from <xref target="RFC4814" format="default"/> when benchmarking
      stateful NATxy gateways (such as NAT44 <xref
      target="RFC3022" format="default"/>, NAT64 <xref target="RFC6146"
      format="default"/>, and NAT66). (It should be noted that stateful NAT66
      is not an IETF specification but refers to an IPv6 version of the
      stateful NAT44 specification.) The authors are not aware of any other
      RFCs that address this question.
      </t>

      <t>First, it is discussed this document discusses why using pseudorandom port numbers with
      stateful NATxy gateways is a difficult problem.
	  </t>
	  <t>Then Then, a solution is recommended.
	  </t>

      <section>
      recommended.</t>

      <section numbered="true" toc="default">
        <name>Requirements Language</name>
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT
          RECOMMENDED", "MAY", "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
        "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL
        NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>",
        "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
        "<bcp14>MAY</bcp14>", and "OPTIONAL" "<bcp14>OPTIONAL</bcp14>" in this document
        are to be interpreted as described in BCP 14 <xref target="RFC2119"/> target="RFC2119"
        format="default"/> <xref target="RFC8174"/> target="RFC8174" format="default"/> when, and
        only when, they appear in all capitals, as shown here.</t>
      </section>
      <!-- [CHECK] The 'Requirements Language' section is optional -->

    </section>
    <section anchor="problem" title="Pseudorandom numbered="true" toc="default">
      <name>Pseudorandom Port Numbers and Stateful Translation"> Translation</name>

      <t>In its appendix, <xref target="RFC2544"/> has defined target="RFC2544" format="default"/>
      defines a frame format for test frames frames, including specific source and
      destination port numbers.  <xref target="RFC4814"/> target="RFC4814" format="default"/>
      recommends using pseudorandom and uniformly distributed values for both
      source and destination port numbers. However, stateful NATxy (NAT44, (such as NAT44,
      NAT64, and NAT66) solutions use the port numbers to identify
      connections. The usage of pseudorandom port numbers causes different
      problems depending on the direction.
	<list style="symbols">
	  <t> As for direction:
      </t>
      <ul spacing="normal">
        <li>
          <t>For the client-to-server direction, pseudorandom source and
	  destination port numbers could be used, used; however, this approach would
	  be a denial of service denial-of-service attack against the stateful NATxy gateway,
	  because it would exhaust its connection tracking table capacity. To that end,
	  let us see some calculations using the recommendations of RFC 4814:
	  <list style="symbols"> <xref target="RFC4814" format="default"/>:
          </t>
          <ul spacing="normal">
            <li>
              <t>The recommended source port range is: 1024-65535, thus is 1024-65535; thus, its size is: is 64512.</t>
            </li>
            <li>
              <t>The recommended destination port range is: 1-49151, thus is 1-49151; thus, its size is: is 49151.</t>
            </li>
            <li>
              <t>The number of source and destination port number combinations is: is 3,170,829,312.</t>
	  </list>
            </li>
          </ul>
          <t>
      It should be noted that the usage of different source and destination IP addresses
	  further increases the number of connection tracking table entries.</t>
	  <t>As for
        </li>
        <li>
          <t>For the server-to-client direction, the stateful DUT (Device Device Under Test) Test (DUT) would drop any
	  packets that do not belong to an existing connection, connection; therefore, the
	  direct usage of pseudorandom port numbers from the above-mentioned ranges mentioned above
	  is not feasible.</t>
	</list>
	</t>
        </li>
      </ul>
    </section>

    <section anchor="setup_term" title="Test numbered="true" toc="default">
      <name>Test Setup and Terminology">

	  <t>Section 12 of <xref target="RFC2544"/> Terminology</name>

      <t><xref target="RFC2544" sectionFormat="of" section="12"/> requires
      testing first using a single protocol source and destination address pair an
      first and then also using multiple protocol addresses. The same
      approach is followed: first, a single source and destination IP address
      pair is used, and then it is explained how to use multiple IP
      addresses.</t>

      <section anchor="setup_term_single" title="When numbered="true" toc="default">
        <name>When Testing with a Single IP Address Pair"> Pair</name>

        <t>The methodology works with any IP versions version to benchmark stateful
        NATxy gateways, where x and y are in {4, 6}. To facilitate an easy
        understanding, two typical examples are used: stateful NAT44 and
        stateful NAT64. </t> NAT64.</t>

        <t>The Test Setup test setup for the well-known stateful NAT44 (also called NAPT:
        Network Address and Port Translation) Translation (NAPT)) solution is shown in
        <xref target="test_setup_sfnat44"/>.</t> target="test_setup_sfnat44" format="default"/>.</t>

        <t>Note that the <xref target="RFC1918"/> private IP addresses from <xref target="RFC1918"
        format="default"/> are used to facilitate an easy understanding of the example. And
        example, and the usage of the IP addresses reserved for benchmarking
        is absolutely legitimate.</t>

        <t keepWithNext="true"/>
        <figure anchor="test_setup_sfnat44" align="center" title="Test setup anchor="test_setup_sfnat44">
          <name>Test Setup for benchmarking
		stateful Benchmarking Stateful NAT44 gateways">
          <preamble></preamble> Gateways</name>
          <artwork align="left"><![CDATA[ align="left" name="" type="" alt=""><![CDATA[
              +--------------------------------------+
     10.0.0.2 |Initiator                    Responder| 198.19.0.2
+-------------|                Tester                |<------------+
| private IPv4|                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
|    10.0.0.1 |                 DUT:                 | 198.19.0.1  |
+------------>|        Stateful NAT44 gateway        |-------------+
  private IPv4|     [connection tracking table]      | public IPv4
              +--------------------------------------+
]]></artwork>

        <postamble></postamble>
        </figure>

	    <t> The Test Setup
        <t keepWithPrevious="true"/>
        <t>The test setup for the also widely used stateful NAT64 <xref target="RFC6146"/> solution <xref target="RFC6146"
        format="default"/>, which is also widely used, is shown in
        <xref target="test_setup_sfnat64"/>.</t> target="test_setup_sfnat64" format="default"/>.</t>

        <t keepWithNext="true"/>
        <figure anchor="test_setup_sfnat64" align="center" title="Test setup anchor="test_setup_sfnat64">
          <name>Test Setup for benchmarking
		stateful Benchmarking Stateful NAT64 gateways">
          <preamble></preamble> Gateways</name>
          <artwork align="left"><![CDATA[ align="left" name="" type="" alt=""><![CDATA[
              +--------------------------------------+
    2001:2::2 |Initiator                    Responder| 198.19.0.2
+-------------|                Tester                |<------------+
| IPv6 address|                         [state table]| IPv4 address|
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
|   2001:2::1 |                 DUT:                 | 198.19.0.1  |
+------------>|        Stateful NAT64 gateway        |-------------+
  IPv6 address|     [connection tracking table]      | IPv4 address
              +--------------------------------------+
]]></artwork>

        <postamble></postamble>
        </figure>
        <t keepWithPrevious="true"/>
        <t>As for the transport layer protocol, <xref target="RFC2544"/> target="RFC2544"
        format="default"/> recommended testing with UDP, and it was kept also kept
        in <xref target="RFC8219"/>. For
		the general recommendation, target="RFC8219" format="default"/>. UDP is also kept, thus kept for a
        general recommendation; thus, the port numbers in the following text
        are to be understood as UDP port numbers. The rationale and
        limitations of this approach are discussed in <xref target="udp_or_tcp"/>.</t>
        target="udp_or_tcp" format="default"/>.</t>

        <t>The most important elements of the proposed benchmarking system are
        defined as follows.
	    <list style="symbols">
		<t>Connection: follows:</t>

<!-- [rfced] In the sentence below, may we clarify "also in the case of
UDP using the same kind of entries as in the case of TCP" as follows?

Original:

   *  Connection: Although UDP itself is a connection-less protocol,
      stateful NATxy gateways keep track of their translation mappings
      in the form of a "connection" also in the case of UDP using the
      same kind of entries as in the case of TCP.</t>
	    <t>Connection TCP.

Perhaps:

   Connection:  Although UDP itself is a connectionless protocol,
      stateful NATxy gateways keep track of their translation mappings
      in the form of a "connection" as well as in the case of UDP using the
      same kind of entries as in TCP.

-->

        <dl newline="false" spacing="normal">
          <dt>Connection:</dt>
	  <dd>Although UDP itself is a connectionless protocol, stateful
	  NATxy gateways keep track of their translation mappings in the form
	  of a "connection" also in the case of UDP using the same kind of
	  entries as in the case of TCP.</dd>

	  <dt>Connection tracking table: The table:</dt>
	  <dd>The stateful NATxy gateway uses a connection tracking table to
	  be able to perform the stateful translation in the server to
		client server-to-client
	  direction. Its size, policy, and content are unknown to the Tester.</t>
		<t>Four tuple: The
	  Tester.</dd>

	  <dt>Four tuple:</dt>
	  <dd>The four numbers that identify a connection are source IP
	  address, source port number, destination IP address, and destination
	  port number.</t>
		<t>State table: The number.</dd>

	  <dt>State table:</dt>
	  <dd>The Responder of the Tester extracts the four tuple from each
	  received test frame and stores it in its state table. Recommendation A recommendation
	  is given for the writing and reading order of the state table in <xref target="st_wr_order"/>.</t>
		<t>Initiator: The
	  target="st_wr_order" format="default"/>.</dd>

	  <dt>Initiator:</dt>
	  <dd>The port of the Tester that may initiate a connection through
	  the stateful DUT in the client-to-server direction. Theoretically,
	  it can use any source and destination port numbers from the ranges
	  recommended by <xref target="RFC4814"/>: target="RFC4814" format="default"/>: if the
	  used four tuple does not belong to an existing connection, the DUT
	  will register a new connection into its connection tracking table.</t>
		<t>Responder: The
	  table.</dd>

	  <dt>Responder:</dt>
	  <dd>The port of the Tester that may not initiate a connection
	  through the stateful DUT in the server-to-client direction. It may send
	  only send frames that belong to an existing connection. To that end,
	  it uses four tuples that have been previously extracted from the
	  received test frames and stored stores in its state table.</t>
		<t>Test table.</dd>

	  <dt>Test phase 1: Test 1:</dt>
	  <dd>The test frames are sent only by the Initiator to the Responder
	  through the DUT to fill both the connection tracking table of the
	  DUT and the state table of the Responder. This is a newly introduced
	  operation phase for stateful NATxy benchmarking. The necessity of
	  this test phase is explained in <xref target="prelim"/>.</t>
		<t>Test target="prelim"
	  format="default"/>.</dd>

	  <dt>Test phase 2: The 2:</dt>
	  <dd>The measurement procedures defined by <xref target="RFC8219"/>
		(e.g. target="RFC8219"
	  format="default"/> (e.g., throughput, latency, etc.) are performed in
	  this test phase after the completion of test phase 1. Test frames
	  are sent as required (e.g. (e.g., a bidirectional test or a unidirectional test
	  in any of the two directions).</t>
		</list>
		</t>
		<t>
		One directions).</dd>
        </dl>

        <t>One further definition is used in the text of this document:
		<list style="symbols">
		<t> Black document:</t>
	<dl newline="false" spacing="normal">
          <dt>Black box testing: It is a testing:</dt>
	  <dd>A testing approach when the Tester is not aware of the
	  details of the internal structure and operation of the DUT. It can
	  send input to the DUT and observe the output of the DUT.</t>
		</list>
		</t> DUT.</dd>
        </dl>
      </section>

      <section anchor="setup_term_multiple" title="When numbered="true" toc="default">
        <name>When Testing with Multiple IP Addresses">

	  <t>The Addresses</name>

        <t>This section considers the number of the necessary and available IP addresses are considered.</t>
        addresses.</t>

        <t>In <xref target="test_setup_sfnat44"/>, target="test_setup_sfnat44" format="default"/>, the single
        198.19.0.1 IPv4 address is used on the WAN side port of the stateful
        NAT44 gateway. However, in practice, it is not a single IP address,
        but rather an IP address range that is assigned to the WAN side port
        of the stateful NAT44 gateways. Its required size depends on the
        number of client nodes and on the type of the stateful NAT44
        algorithm. (The traditional algorithm always replaces the source port number,
        number when a new connection is established. Thus Thus, it requires a
        larger range than the extended algorithm, which replaces the source
        port number only when it is necessary. Please refer to Table Tables 1 and Table
        2 of <xref target="LEN2015"/>.)</t> target="LEN2015" format="default"/>.)</t>

        <t>When router testing is done, section 12 of <xref target="RFC2544"/> target="RFC2544"
        sectionFormat="of" section="12"/> requires testing first using a
        single source and destination IP address pair, pair first and then using
        destination IP addresses from 256 different networks. The 16-23 bits
        of the 198.18.0.0/24 and 198.19.0.0/24 addresses can be used to
        express the 256 networks.  As this document does not deal with router
        testing, no multiple destination networks are needed, needed; therefore, these
        bits are available for expressing multiple IP addresses that belong to
        the same "/16" network. Moreover, both the 198.18.0.0/16 and the
        198.19.0.0/16 networks can be used on the right side of the test setup setup,
        as private IP addresses from the 10.0.0.0/16 network are used on its
        left side.</t>

        <t keepWithNext="true"/>
        <figure anchor="test_setup_sfnat44_multi" align="center" title="Test setup anchor="test_setup_sfnat44_multi">
          <name>Test Setup for benchmarking
		stateful Benchmarking Stateful NAT44 gateways using multiple Gateways Using Multiple IPv4 addresses">
          <preamble></preamble> Addresses</name>
          <artwork align="left"><![CDATA[ align="left" name="" type="" alt=""><![CDATA[
10.0.0.2/16  - 10.0.255.254/16      198.19.0.0/15 - 198.19.255.254/15
           \  +--------------------------------------+  /
            \ |Initiator                    Responder| /
+-------------|                Tester                |<------------+
| private IPv4|                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
| 10.0.0.1/16 |                 DUT:                 | public IPv4 |
+------------>|        Stateful NAT44 gateway        |-------------+
  private IPv4|     [connection tracking table]      | \
              +--------------------------------------+  \
                                   198.18.0.1/15 - 198.18.255.255/15
]]></artwork>

        <postamble></postamble>
        </figure>

        <t keepWithPrevious="true"/>
        <t>A possible solution for assigning multiple IPv4 addresses is shown
        in <xref target="test_setup_sfnat44_multi"/>. target="test_setup_sfnat44_multi" format="default"/>. On the
        left side, the private IP address range is abundantly large. (The
        16-31 bits were used for generating nearly 64k potential different
        source addresses, but the 8-15 bits are also available if needed.) On
        the right side, the 198.18.0.0./15 network is used, and it was cut
        into two equal parts. (Asymmetric division is also possible, if
        needed.)</t>
        <t>It should be noted that these are the potential address ranges. The
        actual address ranges to be used are discussed in <xref target="restr_port_range"/>.</t>
        target="restr_port_range" format="default"/>.</t>
        <t>In the case of stateful NAT64, a single "/64" IPv6 prefix contains
        a high number of bits to express different IPv6 addresses. <xref target="test_setup_sfnat64_multi"/>
        target="test_setup_sfnat64_multi" format="default"/> shows an example, example
        where bits 96-111 are used for that purpose.
        </t>
        <t keepWithNext="true"/>
        <figure anchor="test_setup_sfnat64_multi" align="center" title="Test anchor="test_setup_sfnat64_multi">
          <name>Test Setup for benchmarking
		stateful Benchmarking Stateful NAT64 gateways using multiple Gateways Using
          Multiple IPv6 and IPv4 addresses">
          <preamble></preamble> Addresses</name>
          <artwork align="left"><![CDATA[ align="left" name="" type="" alt=""><![CDATA[
2001:2::[0000-ffff]:0002/64       198.19.0.0/15 - 198.19.255.254/15
           \  +--------------------------------------+  /
  IPv6      \ |Initiator                    Responder| /
+-------------|                Tester                |<------------+
| addresses   |                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
| 2001:2::1/64|                 DUT:                 | public IPv4 |
+------------>|        Stateful NAT64 gateway        |-------------+
 IPv6 address |     [connection tracking table]      | \
              +--------------------------------------+  \
                                   198.18.0.1/15 - 198.18.255.255/15
]]></artwork>

        <postamble></postamble>
        </figure>
        <t keepWithPrevious="true"/>
      </section>
    </section>

    <section anchor="method" title="Recommended numbered="true" toc="default">
      <name>Recommended Benchmarking Method"> Method</name>
      <section anchor="restr_port_range" title="Restricted numbered="true" toc="default">
        <name>Restricted Number of Network Flows"> Flows</name>
        <t>When a single IP address pair is used for testing testing, then the number
        of network flows is determined by the number of source port number and
        destination port number combinations. </t>

<!-- [rfced] For clarity, may we update "in the order of a few times ten
thousand" to "in the order of a few tens of thousands"?

Original:

   If it is possible, the size of the source port number range SHOULD be
   larger (e.g. in the order of a few times ten thousand), whereas the size of
   the destination port number range SHOULD be smaller (may vary from a few to
   several hundreds or thousands as needed).

Perhaps:

   If it is possible, the size of the source port number range SHOULD be
   larger (e.g., in the order of a few tens of thousands), whereas the size of
   the destination port number range SHOULD be smaller (may vary from a few to
   several hundreds or thousands as needed).

-->
        <t>The Initiator SHOULD <bcp14>SHOULD</bcp14> use restricted ranges for
        source and destination port numbers to avoid the exhaustion of the
        connection tracking table capacity of the DUT as described in <xref target="problem"/>.
        target="problem" format="default"/>.  If it is possible, the size of
        the source port number range SHOULD <bcp14>SHOULD</bcp14> be larger (e.g. (e.g., in
        the order of a few times ten thousand), whereas the size of the
        destination port number range SHOULD <bcp14>SHOULD</bcp14> be smaller (may (e.g., it may
        vary from a few to several hundreds or thousands as needed).  The
        rationale is that source and destination port numbers that can be
        observed in
	  the Internet traffic are not symmetrical. Whereas source
        port numbers may be random, there are a few very popular destination
        port numbers (e.g. 443, 80, etc., (e.g., 443 or 80; see <xref target="IIR2020"/>), target="IIR2020"
        format="default"/>), and others hardly occur. And Additionally, it was found that
        their role is also asymmetric in the Linux kernel routing hash
        function <xref target="LEN2020"/>.</t> target="LEN2020" format="default"/>.</t>
        <t>However, in some special cases, the size of the source port range
        is limited. E.g. For example, when benchmarking the CE Customer Edge (CE) and BR
        Border Relay (BR) of a MAP-T <xref target="RFC7599"/> Mapping of Address and Port using Translation
        (MAP-T) system <xref target="RFC7599" format="default"/> together (as
        a compound system performing stateful NAT44), then the source port
        range is limited to the number of source port numbers assigned to each
        subscriber. (It could be as low as 2048 ports.) </t>

	  <t>When ports.)</t>

<!-- [rfced] FYI - To improve readability, we have reformatted the text below
to read as a bulleted list. Please let us know any objections.

Original:

   When multiple IP addresses are used, then the port number ranges
   should be even more restricted, as the number of potential network
   flows is the product of the size of the source IP address range, the
   size of the source port number range, the size of the destination IP
   address range, and the size of the destination port number range.
	  And

Current:

   When multiple IP addresses are used, then the port number ranges
   should be even more restricted, as the number of potential network
   flows is the product of the size of:

   *  the source IP address range,

   *  the source port number range,

   *  the destination IP address range, and

   *  the destination port number range.

-->

	<t>When multiple IP addresses are used, then the port number ranges
        should be even more restricted, as the number of potential network
        flows is the product of the size of:</t>
	<ul>
	  <li>the source IP address range,</li>
	  <li>the source port number range,</li>
	  <li>the destination IP address range, and</li>
	  <li>the destination port number range.</li>
	</ul>
	<t>In addition, the recommended method requires the enumeration of all
	their possible combinations in test phase 1 as described in <xref target="ctrl_conntrack"/>.</t>
	target="ctrl_conntrack" format="default"/>.</t>
        <t>The number of network flows can be used as a parameter. The
        performance of the stateful NATxy gateway MAY <bcp14>MAY</bcp14> be
        examined as a function of this parameter as described in <xref target="sc_net_flows"/>.</t>
        target="sc_net_flows" format="default"/>.</t>
      </section>

      <section anchor="prelim" title="Test numbered="true" toc="default">
        <name>Test Phase 1"> 1</name>
        <t>Test phase 1 serves two purposes:</t>

<!-- [rfced] How may we clarify "that is throughput" in the text below?

Original:

   Test phase 1 serves two purposes:
		<list style="numbers">
		  <t>The

   1.  The connection tracking table of the DUT is filled.  It is
       important, because its maximum connection establishment rate may
       be lower than its maximum frame forwarding rate (that is
       throughput).

Perhaps:

   Test phase 1 serves two purposes:

   1.  The connection tracking table of the DUT is filled. This is important
       because its maximum connection establishment rate may be lower than its
       maximum frame forwarding rate (that is, its throughput).

-->

        <ol spacing="normal" type="1">
	  <li>
	    <t>The connection tracking table of the DUT is filled. This is
	    important because its maximum connection establishment rate may
	    be lower than its maximum frame forwarding rate (that is
	    throughput).</t>
          </li>
          <li>
            <t>The state table of the Responder is filled with valid four
            tuples. It is a precondition for the Responder to be able to
            transmit frames that belong to connections that exist in the
            connection tracking table of the DUT.</t>
		</list>
		Whereas
          </li>
        </ol>

        <t>Whereas the above two things are always necessary before test phase
        2, test phase 1 can be used without test phase 2. It This is done so when
        the maximum connection establishment rate is measured (as described in
        <xref target="meas_max_conn_est_rate"/>).
		</t> target="meas_max_conn_est_rate" format="default"/>).</t>

        <t>Test phase 1 MUST <bcp14>MUST</bcp14> be performed before all tests are
        performed in test phase 2. The following things happen in test phase 1:
		<list style="numbers">
        1:</t>

        <ol spacing="normal" type="1">
	  <li>
            <t>The Initiator sends test frames to the Responder through the
            DUT at a specific frame rate.</t>
          </li>
          <li>
            <t>The DUT performs the stateful translation of the test frames frames,
            and it also stores the new connections in its connection tracking
            table.</t>
          </li>
          <li>
            <t>The Responder receives the translated test frames and updates
            its state table with the received four tuples. The responder Responder
            transmits no test frames during test phase 1.</t>
		</list>
		</t>
          </li>
        </ol>
	<t>When test phase 1 is performed in preparation for test phase 2, the
        applied frame rate SHOULD <bcp14>SHOULD</bcp14> be safely lower than the
        maximum connection establishment rate. (It implies that maximum
        connection establishment rate measurement MUST <bcp14>MUST</bcp14> be
        performed first.)  Please refer to <xref target="ctrl_conntrack"/> target="ctrl_conntrack"
        format="default"/> for further conditions regarding timeout and the
        enumeration of all possible four tuples.</t>
      </section>

      <section anchor="consider_stateful" title="Consideration numbered="true" toc="default">
        <name>Consideration of the Cases of Stateful Operation"> Operation</name>
        <t>The authors consider the most important events that may happen
        during the operation of a stateful NATxy gateway, gateway and the Actions of
        the gateway as follows.
		<list style="numbers"> follows.</t>

        <ol>
	  <li>
	    <t>EVENT: A packet not belonging to an existing connection arrives
	    in the client-to-server
		  direction. ACTION: direction.</t>
	    <t>ACTION: A new connection is registered into the connection
	    tracking
		  table table, and the packet is translated and forwarded.</t>
	  </li>
	  <li>
	     <t>EVENT: A packet not belonging to an existing connection
	     arrives in the server-to-client
		  direction. ACTION: direction.</t>
	     <t>ACTION: The packet is discarded.</t>
	  </li>
          <li>
              <t>EVENT: A packet belonging to an existing connection arrives
              (in any direction).
		   ACTION: direction).</t>
	      <t>ACTION: The packet is translated and forwarded forwarded, and the
	      timeout counter of the corresponding connection tracking table
	      entry is reset.</t>
	  </li>
          <li>
              <t>EVENT: A connection tracking table entry times out.  ACTION: out.</t>
	      <t>ACTION: The entry is deleted from the connection tracking
	      table.</t>
		</list>
		</t>
          </li>
	</ol>

      	<t>Due to "black box" testing, the Tester is not able to directly
      	examine (or delete) the entries of the connection tracking
      	table. But However, the entries can be and MUST <bcp14>MUST</bcp14> be
      	controlled by setting an appropriate timeout value and carefully
      	selecting the port numbers of the packets (as described in <xref target="ctrl_conntrack"/>)
      	target="ctrl_conntrack" format="default"/>) to be able to produce
      	meaningful and repeatable measurement results.
		</t> results.</t>
        <t>This document aims to support the measurement of the following
        performance characteristics of a stateful NATxy gateway:
		<list style="numbers"> gateway:</t>
        <ul spacing="normal">
	  <li>
            <t>maximum connection establishment rate</t>
          </li>
          <li>
            <t>all "classic" performance metrics like throughput, frame loss rate, latency, etc.</t>
          </li>
          <li>
            <t>connection tear-down rate</t>
          </li>
          <li>
            <t>connection tracking table capacity</t>
		</list>
		</t>
          </li>
        </ul>
      </section>

      <section anchor="ctrl_conntrack" title="Control numbered="true" toc="default">
        <name>Control of the Connection Tracking Table Entries"> Entries</name>
        <t>It is necessary to control the connection tracking table entries of
	the DUT to achieve clear conditions for the measurements. One can
	simply achieve the following two extreme situations:
		<list style="numbers">
		  <t>All situations:</t>

        <ol spacing="normal">
	  <li>
            All frames create a new entry in the connection tracking table
            of the DUT DUT, and no old entries are deleted during the test. This is
            required for measuring the maximum connection establishment rate.</t>
		  <t>No
            rate.
          </li>
          <li>
            No new entries are created in the connection tracking table of
            the DUT DUT, and no old ones are deleted during the test. This is ideal
            for the measurements to be executed in phase 2, like throughput,
            latency, etc.</t>
		</list>
		</t> etc.
          </li>
        </ol>

        <t>From this point, the following two assumptions are used:
		<list style="numbers">
		  <t>The used:</t>

        <ol spacing="normal" type="1">
	  <li anchor="assumption1">
            The connection tracking table of the stateful NATxy is large
            enough to store all connections defined by the different four tuples.</t>
		  <t>Each
            tuples.
          </li>
          <li anchor="assumption2">
            Each experiment is started with an empty connection tracking
            table. (It (This can be ensured by deleting its content before the experiment.)</t>
		</list>
		</t>
            experiment.)
          </li>
        </ol>

        <t>The first extreme situation can be achieved by
		<list style="symbols"> by:</t>
        <ul spacing="normal">
          <li>
            <t>using different four tuples for every single test frame in test phase 1 and</t>
		  <t> setting
          </li>
          <li>
            <t>setting the UDP timeout of the NATxy gateway to a value higher
            than the length of test phase 1.</t>
		</list>
		</t>
          </li>
        </ul>
        <t>The second extreme situation can be achieved by
		<list style="symbols"> by:</t>

        <ul spacing="normal">
          <li>
            <t>enumerating all possible four tuples in test phase 1 and</t>
          </li>
          <li>
            <t>setting the UDP timeout of the NATxy gateway to a value higher
            than the length of test phase 1 plus the gap between the two
            phases plus the length of test phase 2.</t>
		</list>
		</t>

		<t>
		<xref target="RFC4814"/>
          </li>
        </ul>

<!--[rfced] As "REQUIRES" is not a key word per RFCs 2119/8174, may we
rephrase this sentence to use "REQUIRED"?

Original:

   [RFC4814] REQUIRES pseudorandom port numbers, which the authors
   believe is a good approximation of the distribution of the source
   port numbers a NATxy gateway on the Internet may face with.

Perhaps:

   As described in [RFC4814], pseudorandom port numbers are REQUIRED,
   which the authors believe is a good approximation of the distribution
   of the source port numbers a NATxy gateway on the Internet may face with.

-->

        <t><xref target="RFC4814" format="default"/> REQUIRES pseudorandom
        port numbers, which the authors believe is a good approximation of the
        distribution of the source port numbers a NATxy gateway on the
        Internet may be faced with.
        </t>

		<t>

<!-- [rfced] For clarity, how may we rephrase "it may be computing efficiently
generated by preparing" in the text below?

Original:

   It should may be noted that although computing efficiently generated by preparing a
   random permutation of the previously enumerated all possible four
   tuples using Durstenfeld's random shuffle algorithm [DUST1964].

Perhaps:

   Efficient computing may be generated by preparing a
   random permutation of the previously enumerated all possible four
   tuples using Durstenfeld's random shuffle algorithm [DUST1964].

-->

        <t>Although the enumeration of all possible four tuples is not a
        requirement for the first extreme situation and the usage of
        different four tuples in test phase 1 is not a requirement for the
        second extreme situation, pseudorandom
        enumeration of all possible four tuples in test phase 1 is a good
        solution in both cases. It may be computing efficiently generated by
        preparing a random permutation of the previously enumerated all
        possible four tuples using Dustenfeld's Durstenfeld's random shuffle algorithm <xref target="DUST1964"/>.
		</t>
        target="DUST1964" format="default"/>.</t>

        <t>The enumeration of the four tuples in increasing or decreasing
        order (or in any other specific order) MAY <bcp14>MAY</bcp14> be used as
        an additional measurement.
		</t> measurement.</t>

      </section>

      <section anchor="meas_max_conn_est_rate" title="Measurement numbered="true" toc="default">
        <name>Measurement of the Maximum Connection Establishment Rate"> Rate</name>
        <t>The maximum connection establishment rate is an important
        characteristic of the stateful NATxy gateway gateway, and its determination is
        necessary for the safe execution of test phase 1 (without frame loss)
        before test phase 2.
        </t>
        <t>The measurement procedure of the maximum connection establishment
        rate is very similar to the throughput measurement procedure defined
        in <xref target="RFC2544"/>. target="RFC2544" format="default"/>.
        </t>
		<t>Procedure:

<!-- [rfced] FYI - We have reformatted the text below to read as a bulleted
list to improve readability. Please review and let us know of any objections.

Original:

   Procedure: The Initiator sends a specific number of test frames using
   all different four tuples at a specific rate through the DUT.  The
   Responder counts the frames that are successfully translated by the
   DUT.  If the count of offered frames is equal to the count of
   received frames, the rate of the offered stream is raised and the
   test is rerun.  If fewer frames are received than were transmitted,
   the rate of the offered stream is reduced and the test is rerun.
		</t>

Current:

   The procedure is as follows:

   *  The Initiator sends a specific number of test frames using all
      different four tuples at a specific rate through the DUT.

   *  The Responder counts the frames that are successfully translated
      by the DUT.

   *  If the count of offered frames is equal to the count of received
      frames, the rate of the offered stream is raised and the test is
      rerun.

   *  If fewer frames are received than were transmitted, the rate of
      the offered stream is reduced and the test is rerun.

-->

        <t>The procedure is as follows:</t>
	<ul>
          <li>The Initiator sends a specific number of test frames using all
          different four tuples at a specific rate through the DUT.</li>
	  <li>The Responder counts the frames that are successfully translated
	  by the DUT.</li>
	  <li>If the count of offered frames is equal to the count of received
	  frames, the rate of the offered stream is raised and the test is
	  rerun.</li>
	  <li>If fewer frames are received than were transmitted, the rate of
	  the offered stream is reduced and the test is rerun.</li>
	</ul>

        <t>The maximum connection establishment rate is the fastest rate at
        which the count of test frames successfully translated by the DUT is
        equal to the number of test frames sent to it by the Initiator.
        </t>

<!-- [rfced] Please review whether any of the notes in this document
should be in the <aside> element. It is defined as "a container for
content that is semantically less important or tangential to the
content that surrounds it"
(https://authors.ietf.org/en/rfcxml-vocabulary#aside).
-->

        <t>Note: In practice, the usage of binary search is RECOMMENDED.</t>
        <bcp14>RECOMMENDED</bcp14>.</t>
      </section>
      <section anchor="validation_of_conn" title="Validation numbered="true" toc="default">
        <name>Validation of Connection Establishment"> Establishment</name>
        <t>Due to "black box" testing, the entries of the connection tracking
        table of the DUT may not be directly examined, but examined. However, the presence of the
        connections can be checked easily by sending frames from the Responder
        to the Initiator in test phase 2 using all four tuples stored in the
        state table of the Tester (at a low enough frame rate). The arrival of
        all test frames indicates that the connections are indeed present.
        </t>

		<t>Procedure: When

        <t>The procedure is as follows:</t>
	  <t>When all the desired N number of test frames were are sent by the
	  Initiator to the Receiver at frame rate R in test phase 1 for the
	  maximum connection establishment rate measurement, measurement and the Receiver
	  has successfully received all the N frames, the establishment
	  of the connections is checked in test phase 2 as follows:
		<list style="symbols">
		  <t>The follows:</t>
          <ul>
            <li>
              The Responder sends test frames to the Initiator at frame rate r=R*alpha,
              r=R*alpha for the duration of N/r N/r, using a different four tuple
              from its state table for each test frame.</t>
		  <t>The frame.
            </li>

            <li>
              The Initiator counts the received frames, and if all N frames are arrived
              have arrived, then the R frame rate of the maximum connection
              establishment rate measurement (performed in test phase 1) is
              raised for the next iteration,
		  otherwise iteration; otherwise, it is lowered (as well as in
              the case if that test frames were missing in the preliminary test phase).</t>
		</list>
		</t>
		<t>Notes:
		  <list style="symbols">
		    <t>The
              phase, as well).
            </li>
         </ul>

	  <t>Notes:</t>
          <ul spacing="normal">
            <li>
              The alpha is a kind of "safety factor", factor"; it aims to make sure
              that the frame rate used for the validation is not too high, and the
              test may fail only in the case of if at least one connection is not
              present in the connection tracking table of the DUT. (So (Therefore, alpha
              should be typically less than 1, e.g. e.g., 0.8 or 0.5.)
			</t>
			<t>The
            </li>
            <li>
              The duration of N/r and the frame rate of r means that N frames
              are sent for validation.</t>
			<t>The validation.
            </li>
            <li>
              The order of four tuple selection is arbitrary arbitrary, provided that
              all four tuples MUST <bcp14>MUST</bcp14> be used.</t>
			<t>Please used.
            </li>
            <li>
              Please refer to <xref target="meas_contr_capacity"/> target="meas_contr_capacity"
              format="default"/> for a short analysis of the operation of the
              measurement and what problems may occur.</t>
		  </list>
		</t> occur.
            </li>
          </ul>

      </section>

      <section anchor="real_test" title="Test numbered="true" toc="default">
        <name>Test Phase 2"> 2</name>

        <t>As for the traffic direction, there are three possible cases
        during test phase 2:
	    <list style="symbols">
		  <t>bidirectional 2:</t>

        <ol spacing="normal" type="1">
          <li>
            <t>Bidirectional traffic: The Initiator sends test frames to the Responder
            Responder, and the Responder sends test frames to the
            Initiator.</t>
		  <t>unidirectional
          </li>
          <li>
            <t>Unidirectional traffic from the Initiator to the Responder: The
            Initiator sends test frames to the Responder Responder, but the Responder
            does not send test frames to the Initiator.</t>
		  <t>unidirectional
          </li>
          <li>
            <t>Unidirectional traffic from the Responder to the Initiator: The
            Responder sends test frames to the Initiator Initiator, but the Initiator
            does not send test frames to the Responder.</t>
		</list>
		</t>
          </li>
        </ol>

        <t>If the Initiator sends test frames, then it uses pseudorandom
        source port numbers and destination port numbers from the restricted
        port number ranges. (If it uses multiple source and/or destination IP
        addresses, then their ranges are also limited.)  The responder Responder
        receives the test frames, updates its state table, and processes the
        test frames as required by the given measurement procedure (e.g. (e.g., only
        counts them for the throughput test, handles timestamps for latency or
        PDV tests, etc.).
		</t> etc.).</t>

        <t>If the Responder sends test frames, then it uses the four tuples
        from its state table. The reading order of the state table may follow
        different policies (discussed in <xref target="st_wr_order"/>). target="st_wr_order"
        format="default"/>). The Initiator receives the test frames and
        processes them as required by the given measurement procedure.
		</t>
		<t>
		As procedure.</t>

        <t>As for the actual measurement procedures, the usage of the updated
        ones from Section 7 of <xref target="RFC8219"/> target="RFC8219" sectionFormat="of" section="7"/> is RECOMMENDED.
		</t>
        <bcp14>RECOMMENDED</bcp14>.</t>
      </section>

      <section anchor="meas_conn_tear_down_rate" title="Measurement numbered="true" toc="default">
        <name>Measurement of the Connection Tear-down Rate"> Tear-Down Rate</name>
        <t>Connection tear-down can cause significant load for the NATxy
        gateway.  The connection tear-down performance can be measured as follows:
	    <list style="numbers">
		  <t>Load
        follows:</t>
        <ol spacing="normal" type="1">
	  <li>Load a certain number of connections (N) into the connection
	  tracking table of the DUT (in the same way as done to measure the
	  maximum connection establishment rate).</t>
		  <t>Record TimestampA.</t>
		  <t>Delete rate).</li>
          <li>Record TimestampA.</li>
          <li>Delete the content of the connection tracking table of the DUT.</t>
		  <t>Record TimestampB.</t>
  		</list>
		The DUT.</li>
          <li>Record TimestampB.</li>
        </ol>

        <t>The connection tear-down rate can be computed as:
		</t>
        <t>connection as:</t>

        <t indent="5">connection tear-down rate = N / ( TimestampB - TimestampA)
        </t> TimestampA)</t>

        <t>The connection tear-down rate SHOULD <bcp14>SHOULD</bcp14> be measured for
        various values of N.
		</t> N.</t>
        <t>It is assumed that the content of the connection tracking table may
        be deleted by an out-of-band control mechanism specific to the given
        NATxy gateway implementation.
		(E.g. implementation (e.g., by removing the appropriate kernel
        module under Linux.)
		</t> Linux).</t>
        <t>It is noted that the performance of removing the entire content of
        the connection tracking table at one time may be different from
        removing all the entries one by one.
		</t> one.</t>
      </section>

      <section anchor="meas_contr_capacity" title="Measurement numbered="true" toc="default">
        <name>Measurement of the Connection Tracking Table Capacity"> Capacity</name>
        <t>The connection tracking table capacity is an important metric of
        stateful NATxy gateways. Its measurement is not easy, because an
        elementary step of a validated maximum connection establishment rate
        measurement (defined in <xref target="validation_of_conn"/>) target="validation_of_conn"
        format="default"/>) may have only a few distinct observable outcomes,
        but some of them may have different root causes:
	    <list style="numbers"> causes:</t>
        <ul spacing="normal">
	  <li>
            <t>During test phase 1, the number of test frames received by the
            Responder is less than the number of test frames sent by the
            Initiator.  It may have different root causes, including:
		  <list style="numbers"> including:</t>
            <ul spacing="normal">
	      <li>
                <t>The R frame sending rate was higher than the maximum
                connection establishment rate. (Note that now the maximum
                connection establishment rate is considered unknown because
                one can not cannot measure the maximum connection establishment
                without assumption 1 <xref target="assumption1" format="none">assumption 1</xref> in <xref target="ctrl_conntrack"/>!) target="ctrl_conntrack"
                format="default"/>.)  This root cause may be eliminated by
                lowering the R rate and re-executing the test. (This step may
                be performed multiple times, times while R>0.)</t> R&gt;0.)</t>
              </li>
              <li>
                <t>The capacity of the connection tracking table of the DUT
                has been
			  exhausted. (And exhausted (and either the DUT does not want to
                delete connections or the deletion of the connections makes it slower. This
                slower; this case is not investigated further in test phase 1.)</t>
		  </list>
	      </t>
                1).</t>
              </li>
            </ul>
          </li>
          <li>
            <t>During test phase 1, the number of test frames received by the
            Responder equals the number of test frames sent by the Initiator.
            In this case, the connections are validated in test phase 1.  The
            validation may have two kinds of observable results:
		  <list style="numbers"> results:</t>
            <ol spacing="normal" type="1">
	      <li>
                <t>The number of validation frames received by the Initiator
                equals the number of validation frames sent by the Responder.
                (It proves that the capacity of the connection tracking table
                of the DUT is enough and both R and r were chosen
                properly.)</t>
              </li>
              <li>
                <t>The number of validation frames received by the Initiator
                is less than the number of validation frames sent by the
                Responder.  This phenomenon may have various root causes:
			<list style="numbers"> causes:</t>
                <ul spacing="normal">
		  <li>
                    <t>The capacity of the connection tracking table of the
                    DUT has been exhausted. (It does not matter, matter whether some
                    existing connections are discarded and new ones are stored,
                    stored or if the new connections are discarded.  Some
                    connections are lost anyway, and it makes validation
                    fail.)</t>
                  </li>
                  <li>
                    <t>The R frame sending rate used by the Initiator was too
                    high in test phase 1 and thus 1; thus, some connections were not established,
                    established even though all test frames arrived at the
                    Responder. This root cause may be eliminated by lowering
                    the R rate and re-executing the test.  (This step may be
                    performed multiple times, times while R>0.)</t> R&gt;0.)</t>
                  </li>
                  <li>
                    <t>The r frame sending rate used by the Responder was too
                    high in test phase 2 and thus 2; thus, some test frames did not
                    arrive at the Initiator, Initiator even though all connections were
                    present in the connection tracking table of the DUT.  This
                    root cause may be eliminated by lowering the r rate and
                    re-executing the test.  (This step may be performed
                    multiple times, times while r>0.)</t>
			</list>
			And here r&gt;0.)</t>
                  </li>
                </ul>
                <t>This is the problem: as As the above three root causes are
                indistinguishable, it is not easy to decide, decide whether R or r
                should be decreased.
			</t>
		  </list>
		  </t>
		</list>
		</t> decreased.</t>
              </li>
            </ol>
          </li>
        </ul>
        <t>Experience shows that the DUT may collapse if its memory is
        exhausted.  Such a situation may make the connection tracking table
        capacity measurements rather inconvenient. This possibility is
        included in the recommended measurement procedure, but the detection
        and elimination of such a situation is not addressed. (E.g. addressed (e.g., how the
        algorithm can reset the DUT.)
		</t> DUT).</t>
        <t>For the connection tracking table size measurement, first first, one needs
        a safe number: C0. It is a precondition, precondition that C0 number of connections
        can surely be stored in the connection tracking table of the
        DUT. Using C0, one can determine the maximum connection establishment
        rate using C0 number of connections.  It is done with a binary search
        using validation. The result is R0. The values C0 and R0 will serve as
        "safe" starting values for the following two searches.
		</t> searches.</t>
	<t>First, an exponential search is performed to find the order of
	magnitude of the connection tracking table capacity. The search stops
	if the DUT collapses OR the maximum connection establishment rate
	severely drops (e.g. (e.g., to its one tenth) due to doubling the number of connections.
		</t>
	connections.</t>
        <t>Then, the result of the exponential search gives the order of
        magnitude of the size of the connection tracking table. Before
        disclosing the possible algorithms to determine the exact size of the
        connection tracking table, three possible replacement policies for the
        NATxy gateway are considered:
	    <list style="numbers"> considered:</t>
        <ol spacing="normal" type="1">
	  <li>
            <t>The gateway does not delete any live connections until their timeout expires.</t>
          </li>
          <li>
            <t>The gateway replaces the live connections according to LRU (least recently used) the Least Recently Used (LRU) policy.</t>
          </li>
          <li>
            <t>The gateway does a garbage collection when its connection
            tracking table is full and a frame with a new four tuple
            arrives. During the garbage collection, it deletes the K
		  least recently used LRU connections, where K is greater than 1.</t>
  		</list>
		Now,
          </li>
        </ol>
        <t>Now, it is examined what happens and how many validation frames
        arrive in the there three cases.  Let the size of the connection tracking
        table be S, S and the number of preliminary frames be N, where S is less
        than N.
	    <list style="numbers"> N.</t>
        <ol spacing="normal" type="1">
	  <li>
            <t>The connections defined by the first S test frames are
            registered into the connection tracking table of the DUT, and
            the last N-S connections are lost.  (It is another question if the
            last N-S test frames are translated and forwarded in test phase 1
            or simply dropped.) During validation, the validation frames with
            four tuples corresponding to the first S test frames will arrive
            at the Initiator and the other N-S validation frames will be
            lost.</t>
          </li>
          <li>
            <t>All connections are registered into the connection tracking
            table of the DUT, but the first N-S connections are replaced (and
            thus lost). During validation, the validation frames with four
            tuples corresponding to the last S test frames will arrive to the
            Initiator, and the other N-S validation frames will be lost. </t> lost.</t>
          </li>
          <li>
            <t>Depending on the values of K, S, and N, maybe less than S
            connections will survive.  In the worst case, only S-K+1
            validation frames arrive, even though, though the size of the connection
            tracking table is S.</t>
  		</list>
		If
          </li>
        </ol>

        <t>If one knows that the stateful NATxy gateway uses the first or
        second replacement policy and one also knows that both R and r rates
        are low enough, then the final step of determining the size of the
        connection tracking table is simple. If the Responder sent N
        validation frames and the Initiator received N' of them, then the size
        of the connection tracking table is N'.
 		</t> N'.</t>

        <t>In the general case, a binary search is performed to find the exact
        value of the connection tracking table capacity within E error. The
        search chooses the lower half of the interval if the DUT collapses OR
        the maximum connection establishment rate severely drops (e.g. (e.g., to its half) otherwise
        half); otherwise, it chooses the higher half.  The search stops if the
        size of the interval is less than the E error.
		</t> error.</t>

        <t>The algorithms for the general case are defined using C C, like the
        pseudocode in <xref target="meas_contr_capacity_algo"/>. target="meas_contr_capacity_algo"
        format="default"/>. In practice, this algorithm may be made more
        efficient in a the way that the binary search for the maximum connection
        establishment rate stops, stops if an elementary test fails at a rate under
        RS*beta or RS*gamma during the external search or during the final
        binary search for the capacity of the connection tracking table,
        respectively. (This saves a high amount of execution time by
        eliminating the long-lasting tests at low rates.)
        </t>
        <figure anchor="meas_contr_capacity_algo" align="center" title="Measurement anchor="meas_contr_capacity_algo">
          <name>Measurement of the Connection Tracking Table Capacity"> Capacity</name>
          <sourcecode type="pseudocode"><![CDATA[
// The binarySearchForMaximumConnectionCstablishmentRate(c,r)
// function performs a binary search for the maximum connection
// establishment rate in the [0, r] interval using c number of
// connections.

// This is an exponential search for finding the order of magnitude
// of the connection tracking table capacity
// Variables:
//   C0 and R0 are beginning safe values for the connection
//     tracking table size and connection establishment rate,
//     respectively
//   CS and RS are their currently used safe values
//   CT and RT are their values for the current examination
//   beta is a factor expressing an unacceptable drop in R (e.g. (e.g.,
//     beta=0.1)
//   maxrate is the maximum frame rate for the media
R0=binarySearchForMaximumConnectionCstablishmentRate(C0,maxrate);
for ( CS=C0, RS=R0; 1; CS=CT, RS=RT )
{
  CT=2*CS;
  RT=binarySearchForMaximumConnectionCstablishmentRate(CT,RS);
  if ( DUT_collapsed || RT < RS*beta )
    break;
}
// At this point, the size of the connection tracking table is
// between CS and CT.

// This is the final binary search for finding the connection
// tracking table capacity within E error
// Variables:
//   CS and RS are the safe values for connection tracking table size
//     and connection establishment rate, respectively
//   C and R are the values for the current examination
//   gamma is a factor expressing an unacceptable drop in R
//     (e.g.     (e.g., gamma=0.5)
for ( D=CT-CS;  D>E; D=CT-CS )
{
  C=(CS+CT)/2;
  R=binarySearchForMaximumConnectionCstablishmentRate(C,RS);
  if ( DUT_collapsed || R < RS*gamma )
    CT=C; // take the lower half of the interval
  else
    CS=C,RS=R; // take the upper half of the interval
}
// At this point, the size of the connection tracking table is
// CS within E error.
]]></sourcecode>

        <postamble></postamble>

        </figure>
        <t keepWithPrevious="true"/>
      </section>

      <section anchor="st_wr_order" title="Writing numbered="true" toc="default">
        <name>Writing and Reading Order of the State Table"> Table</name>
        <t>As for the writing policy of the state table of the Responder,
        round robin is RECOMMENDED, <bcp14>RECOMMENDED</bcp14>, because it ensures that its
        entries are automatically kept fresh and consistent with that of the
        connection tracking table of the DUT.
        </t>
        <t>The Responder can read its state table in various orders, for
        example:
	    <list style="symbols">
		  <t>pseudorandom</t>
		  <t>round-robin</t>
		</list>
        </t>
		<t>
		Pseudorandom
        <ul spacing="normal">
          <li>
            <t>pseudorandom</t>
          </li>
          <li>
            <t>round robin</t>
          </li>
        </ul>
        <t>Pseudorandom is RECOMMENDED <bcp14>RECOMMENDED</bcp14> to follow the approach
        of <xref target="RFC4814"/>.
		Round-robin target="RFC4814" format="default"/>.  Round robin may be used
        as a computationally cheaper alternative.
        </t>
      </section>
    </section>
    <section anchor="meas_scalability" title="Scalability Measurements"> numbered="true" toc="default">
      <name>Scalability Measurements</name>

<!--[rfced] May we clarify the singular/plural usage in this sentence as
follows??

Original:

   ...but it is RECOMMENDED to perform measurement series
   through which the value of one or more parameter(s) is/are changed to
   discover how the various values of the given parameter(s) influence
   the performance of the DUT.

Perhaps:

   ...but it is RECOMMENDED to perform measurement series
   through which the value of each parameter is changed to
   discover how the various values of the each given parameter influences
   the performance of the DUT.

-->

      <t>As for scalability measurements, no new types of performance metrics
      are defined, but it is RECOMMENDED <bcp14>RECOMMENDED</bcp14> to perform measurement
      series through which the value of one or more parameter(s)
	  is/are are
      changed to discover how the various values of the given parameter(s)
      influence the performance of the DUT.
      </t>
      <section anchor="sc_net_flows" title="Scalability numbered="true" toc="default">
        <name>Scalability Against the Number of Network Flows"> Flows</name>
        <t>The scalability measurements aim to quantify how the performance of
        the stateful NATxy gateways degrades with the increase of the number
        of network flows.</t>
        <t>As for the actual values for the number of network flows to be used
        during the measurement series, it is RECOMMENDED <bcp14>RECOMMENDED</bcp14> to use
        some representative values from the range of the potential number of
        network flows the DUT may be faced with during its intended usage.</t>
        <t>It is important, important how the given number of network flows are
        generated. The sizes of the ranges of the source and destination IP
        addresses and port numbers are essential parameters to be reported
        together with the results. Please see also see <xref target="reporting_format"/>
        target="reporting_format" format="default"/> about the reporting
        format.</t>
        <t>If a single IP address pair is used, then it is RECOMMENDED <bcp14>RECOMMENDED</bcp14> to use
		<list style="symbols"> use:
        </t>
        <ul spacing="normal">
          <li>
            <t>a fixed, larger source port number range (e.g., a few times 10,000)</t>
            10,000) and</t>
          </li>
          <li>
            <t>a variable size variable-size destination port number range (e.g. 10; 100; 1,000; (e.g., 10, 100,
            1,000, etc.), where its expedient granularity depends on the
            purpose.</t>
	    </list>
        </t>
          </li>
        </ul>
      </section>
      <section anchor="sc_cpu_cores" title="Scalability numbered="true" toc="default">
        <name>Scalability Against the Number of CPU Cores"> Cores</name>
        <t>Stateful NATxy gateways are often implemented in software that are is
        not bound to a specific hardware but can be executed by commodity
        servers. To facilitate the comparison of their performance, it can be
        useful to determine
	    <list style="symbols"> determine:
        </t>
        <ul spacing="normal">
          <li>
            <t>the performance of the various implementations using a single
            core of a well-known CPU</t> CPU and</t>
          </li>
          <li>
            <t>the scale-up of the performance of the various implementations
            with the number of CPU cores.</t>
	    </list>
	    </t>
          </li>
        </ul>
        <t>If the number of the available CPU cores is a power of two, then it
        is RECOMMENDED <bcp14>RECOMMENDED</bcp14> to perform the tests with 1, 2, 4, 8,
        16, etc. number of active CPU cores of the DUT.</t>
      </section>
    </section>

    <section anchor="reporting_format" title="Reporting Format"> numbered="true" toc="default">
      <name>Reporting Format</name>
      <t>Measurements MUST <bcp14>MUST</bcp14> be executed multiple times. The
      necessary number of repetitions to achieve statistically reliable
      results may depend on the consistent or scattered nature of the results.
      The report of the results MUST <bcp14>MUST</bcp14> contain the number of
      repetitions of the measurements.
	  Median  The median is RECOMMENDED <bcp14>RECOMMENDED</bcp14>
      as the summarizing function of the results complemented with the first
      percentile and the 99th percentile as indices of the dispersion of the
      results.
	  Average  The average and standard deviation MAY <bcp14>MAY</bcp14> also be
      reported.
      </t>
      <t>All parameters and settings that may influence the performance of the
      DUT MUST <bcp14>MUST</bcp14> be reported. Some of them may be specific to the
      given NATxy gateway implementation, like the "hashsize" (hash table
      size) and "nf_conntrack_max" (number of connection tracking table
      entries) values for iptables or the limit of the number of states for
      OpenBSD PF (set by the "set limit states number" command in the pf.conf
      file).
      </t>

       <figure
      <t keepWithNext="true"/>

      <table anchor="iptables-conn-scale" align="center" title="Example table: align="left">
	<name>Example Table of the Maximum connection establishment rate Connection Establishment Rate of iptables against
	Iptables Against the number Number of sessions">
       <preamble></preamble>
       <artwork align="left"><![CDATA[
number Sessions</name>
	<tbody>
	  <tr>
	    <td align="left">number of sessions (req.)            0.4M       4M     40M     400M
source (req.)</td>
	    <td align="right">0.4M</td>
	    <td align="right">4M</td>
	    <td align="right">40M</td>
	    <td align="right">400M</td>
	  </tr>
	  <tr>
	    <td align="left">source port numbers (req.)         40,000   40,000  40,000   40,000
destination (req.)</td>
            <td align="right">40,000</td>
	    <td align="right">40,000</td>
	    <td align="right">40,000</td>
	    <td align="right">40,000</td>
	  </tr>
	  <tr>
	    <td align="left">destination port numbers (req.)        10      100   1,000   10,000
"hashsize" (i.s.)                    2^17     2^20    2^23     2^27
"nf_conntrack_max" (i.s.)            2^20     2^23    2^26     2^30
num. (req.)</td>
            <td align="right">10</td>
	    <td align="right">100</td>
	    <td align="right">1,000</td>
	    <td align="right">10,000</td>
	  </tr>
	  <tr>
	    <td align="left">"hashsize" (i.s.)</td>
            <td align="right">2<sup>17</sup></td>
	    <td align="right">2<sup>20</sup></td>
	    <td align="right">2<sup>23</sup></td>
	    <td align="right">2<sup>27</sup></td>
	  </tr>
	  <tr>
	    <td align="left">"nf_conntrack_max" (i.s.)</td>
            <td align="right">2<sup>20</sup></td>
	    <td align="right">2<sup>23</sup></td>
	    <td align="right">2<sup>26</sup></td>
	    <td align="right">2<sup>30</sup></td>
	  </tr>
	  <tr>
	    <td align="left">num. sessions / "hashsize" (i.s.)    3.05     3.81    4.77     2.98
number (i.s.)</td>
	    <td align="right">3.05</td>
	    <td align="right">3.81</td>
	    <td align="right">4.77</td>
	    <td align="right">2.98</td>
	  </tr>
	  <tr>
	    <td align="left">number of experiments (req.)           10       10      10       10
error (req.)</td>
            <td align="right">10</td>
	    <td align="right">10</td>
	    <td align="right">10</td>
	    <td align="right">10</td>
	  </tr>
	  <tr>
	    <td align="left">error of binary search (req.)       1,000    1,000   1,000    1,000
connections/s (req.)</td>
	    <td align="right">1,000</td>
	    <td align="right">1,000</td>
	    <td align="right">1,000</td>
	    <td align="right">1,000</td>
	  </tr>
	  <tr>
	    <td align="left">connections/s median (req.)
connections/s (req.)</td>
	    <td></td>
	    <td></td>
	    <td></td>
	    <td></td>
	  </tr>
	  <tr>
	    <td align="left">connections/s 1st perc. (req.)
connections/s (req.)</td>
	    <td></td>
	    <td></td>
	    <td></td>
	    <td></td>
	  </tr>
	  <tr>
	    <td align="left">connections/s 99th perc. (req.)
          ]]></artwork>

       <postamble></postamble>
       </figure> (req.)</td>
	    <td></td>
	    <td></td>
	    <td></td>
	    <td></td>
	  </tr>
	</tbody>
      </table>

      <t keepWithPrevious="true"/>

      <t><xref target="iptables-conn-scale"/> target="iptables-conn-scale" format="default"/> shows an
      example of table headings for reporting the measurement results for regarding the
      scalability of the iptables stateful NAT44 implementation against the
      number of sessions. The table indicates the always required fields
      (req.) and the implementation-specific ones (i.s.).  A computed value
      was also added in row 6; it is the number of sessions per hashsize
      ratio, which helps the reader to interpret the achieved maximum
      connection establishment rate.  (A lower value results in shorter linked
      lists hanging on the entries of the hash
	  table table, thus facilitating higher
      performance. The ratio is varying, because the number of sessions is
      always a power of 10, whereas the hash table size is a power of 2.)  To
      reflect the accuracy of the results, the table contains the value of the
      "error" of the binary search, which expresses the stopping criterion for
      the binary search. The binary search stops, stops when the difference between
      the "higher limit" and "lower limit" of the binary search is less than
      or equal to the "error".

      </t>

	  <t> The
      <t>The table MUST <bcp14>MUST</bcp14> be complemented with reporting the
      relevant parameters of the DUT. If the DUT is a general-purpose computer
      and some software NATxy gateway implementation is tested, then the
      hardware description SHOULD <bcp14>SHOULD</bcp14> include: the computer type, CPU type,
      type and number of active CPU cores, memory type, size and speed,
      network interface card type (reflecting also (also reflecting the speed), the fact that
      direct cable connections were used or used, and the type of the switch used for
      interconnecting the Tester and the DUT. Operating The operating system type and
      version, kernel version, and the version of the NATxy gateway
      implementation (including the last commit date and number if applicable) SHOULD
      <bcp14>SHOULD</bcp14> also be given.
      </t>
    </section>

    <section anchor="impl_exp" title="Implementation numbered="true" toc="default">
      <name>Implementation and Experience"> Experience</name>

      <t>The stateful extension of siitperf <xref target="SIITPERF"/> target="SIITPERF"
      format="default"/> is an implementation of this concept.  Its first
      version that only supporting supports multiple port numbers is documented in this
      (open access) paper paper: <xref target="LEN2022"/>. target="LEN2022" format="default"/>.  Its
      extended version that also supporting supports multiple IP addresses is documented in
      this (open access) paper paper: <xref target="LEN2024b"/>. target="LEN2024b" format="default"/>.
      </t>
	  <t> The

      <t>The proposed benchmarking methodology has been validated by
      performing benchmarking measurements with three radically different
      stateful NAT64 implementations (Jool, tayga+iptables, and OpenBSD PF) in this
      (open access) paper paper: <xref target="LEN2023"/>.
	  </t> target="LEN2023" format="default"/>.</t>

      <t>Further experience with this methodology of using siitperf for measuring
      the scalability of the iptables stateful NAT44 and Jool stateful NAT64
      implementations are described in <xref target="I-D.lencse-v6ops-transition-scalability"/>.
	  </t>
      target="I-D.lencse-v6ops-transition-scalability" format="default"/>.</t>

      <t>This methodology was successfully applied for the benchmarking of
      various
	  IPv4aas (IPv4-as-a-Service) IPv4-as-a-Service (IPv4aas) technologies without the usage of
      technology-specific Testers by reducing the aggregate of their CE (Customer Edge) and PE (Provider Edge) Customer
      Edge (CE) and Provider Edge (PE) devices to a stateful NAT44 gateway
      documented in this (open access) paper paper: <xref target="LEN2024a"/>.
	  </t> target="LEN2024a"
      format="default"/>.</t>
    </section>

    <section anchor="udp_or_tcp" title="Limitations numbered="true" toc="default">
      <name>Limitations of using Using UDP as a Transport Layer Protocol"> Protocol</name>

      <t>The test frame format defined in RFC 2544 <xref target="RFC2544"/> exclusively uses UDP (and
      not TCP) as a transport layer protocol. Testing with UDP was kept in
      both RFC 5180 <xref target="RFC5180"/> and RFC 8219 <xref target="RFC8219"/> regarding the standard benchmarking
      procedures (throughput, latency, frame loss rate, etc.).  The
      benchmarking methodology proposed in this document follows this long established long-established benchmarking tradition using UDP as a transport layer
      protocol, too. The rationale for this is that the standard benchmarking
      procedures require sending frames at arbitrary constant frame rates,
      which would violate the flow control and congestion control algorithms
      of the TCP protocol. TCP connection setup (using the three-way
      handshake) would further complicate testing.
	  </t> testing.</t>

      <t>Further potential transport layer protocols protocols, e.g., DCCP <xref target="RFC4340"/> and SCTP the Datagram Congestion Control Protocol (DCCP) <xref
      target="RFC4340" format="default"/> and the Stream Control Transmission Protocol (SCTP) <xref target="RFC9260"/> target="RFC9260"
      format="default"/>, are outside of the scope of this document, as the widely-used
      widely used stateful NAT44 and stateful NAT64 implementations do not
      support them. Although QUIC <xref target="RFC9000"/> target="RFC9000" format="default"/> is
      also considered a transport layer protocol, but QUIC packets are carried
      in UDP
	  datagrams thus datagrams; thus, QUIC does not need a special handling.
	  </t> handling.</t>

      <t>Some stateful NATxy solutions handle TCP and UDP differently, e.g.
      e.g., iptables uses use a 30s timeout for UDP and a 60s timeout for TCP. Thus Thus,
      benchmarking results produced using UDP do not necessarily characterize
      the performance of a NATxy gateway well enough when they are used for
      forwarding Internet traffic. As for the given example, timeout values of
      the DUT may be adjusted, but it requires extra consideration.
	  </t> consideration.</t>

      <t>Other differences in handling UDP or TCP are also possible. Thus, the
      authors recommend that further investigations should be performed in
      this field.
	  </t> field.</t>

      <t>As a mitigation of this problem, this document recommends that
      testing with protocols using TCP (like HTTP and HTTPS up to version 2)
      can be performed as described in <xref target="RFC9411"/>. target="RFC9411"
      format="default"/>.  This approach also solves the potential problem of
      protocol helpers that may be present in the stateful DUT.
	  </t> DUT.</t>

      <t>As for HTTP/3, it uses QUIC, which uses UDP as stated above. It
      should be noted that QUIC is treated as any other UDP payload. The
      proposed measurement method does not aim to measure the performance of
      QUIC, rather rather, it aims to measure the performance of the stateful NATxy gateway.
	  </t>
      gateway.</t>
    </section>

   <section anchor="Acknowledgements" title="Acknowledgements">
      <t>The authors would like to thank Al Morton, Sarah Banks, Edwin Cordeiro, Lukasz Bromirski,
	  Sándor Répás, Tamás Hetényi, Timothy Winters, Eduard Vasilenko, Minh Ngoc Tran, Paolo Volpato,
	  Zeqi Lai, and Bertalan Kovács for their comments.</t>
	  <t>The authors thank Warren Kumari, Michael Scharf, Alexey Melnikov, Robert Sparks, David Dong,
	  Roman Danyliw, Erik Kline, Murray Kucherawy, Zaheduzzaman Sarker, and Éric Vyncke
	  for their reviews and comments.</t>
	 <t>This work was supported by the Japan Trust International Research Cooperation Program
	 of the National Institute of Information and Communications Technology (NICT), Japan.</t>
   </section>

   <!-- Possibly a 'Contributors' section ... -->

   <section anchor="IANA" title="IANA Considerations"> numbered="true" toc="default">
      <name>IANA Considerations</name>
      <t>This document does not make any request to IANA.</t> has no IANA actions.</t>
    </section>

    <section anchor="Security" title="Security Considerations"> numbered="true" toc="default">
      <name>Security Considerations</name>
      <t>This document has no further security considerations beyond that of
      <xref target="RFC8219"/>. target="RFC8219" format="default"/>.  They should be cited here so
      that they can be applied not only for the benchmarking of IPv6 transition
      technologies but also for the benchmarking of any stateful NATxy
      gateways (allowing for x=y, too).</t>
    </section>
  </middle>

 <!--  *****BACK MATTER ***** -->
 <back>

   <displayreference target="I-D.lencse-v6ops-transition-scalability" to="SCALABILITY"/>
   <references>
      <name>References</name>
      <references>
        <name>Normative References</name>

	<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1918.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2544.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3022.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4340.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4814.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5180.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6146.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7599.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8219.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9000.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9260.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9411.xml"/>
      </references>
      <references>
        <name>Informative References</name>

<!-- References split into informative and normative -->

   <!-- There are 2 ways to insert reference entries from the citation libraries:
    1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
    2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
       (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

    Both are cited textually in the same manner: by using xref elements.
    If you use the PI option, xml2rfc will, by default, try to find included files in the same
    directory [I-D.lencse-v6ops-transition-scalability] IESG state: Expired as the including file. You can also define the XML_LIBRARY environment variable
    with a value containing a set of directories to search.  These can be either in the local
    filing system or remote ones accessed by http (http://domain/dir/... ).-->

   <references title="Normative References">
    <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->

    &RFC2119;
	&RFC1918;
	&RFC2544;
	&RFC3022;
	&RFC4340;
    &RFC4814;
	&RFC5180;
	&RFC6146;
	&RFC7599;
    &RFC8174;
	&RFC8219;
	&RFC9000;
	&RFC9260;
	&RFC9411;

   </references>

   <references title="Informative References">
     <!-- Here we use entities that we defined at the beginning. -->

    <?rfc include='reference.I-D.lencse-v6ops-transition-scalability'?> 06/19/24-->
	<xi:include href="https://datatracker.ietf.org/doc/bibxml3/draft-lencse-v6ops-transition-scalability.xml"/>

        <reference anchor="DUST1964"
    target="https://dl.acm.org/doi/10.1145/364520.364540"> target="https://dl.acm.org/doi/pdf/10.1145/364520.364540">
          <front>
            <title>Algorithm 235: Random permutation
            </title>
            <author initials="R." surname="Durstenfeld">
          <organization></organization>
              <organization/>
            </author>
            <date day="" month="July" year="1964"/>
          </front>
      <seriesInfo name="" value="Communications
          <refcontent>Communications of the ACM, vol. 7, no. 7, p.420."/> p. 420</refcontent>
          <seriesInfo name="DOI" value="10.1145/364520.364540"/>
        </reference>

        <reference anchor="IIR2020" target="https://www.iij.ad.jp/en/dev/iir/pdf/iir_vol49_report_EN.pdf">
          <front>
            <title>Periodic observation report: Observation Report: Internet trends Trends as seen Seen from IIJ infrastructure Infrastructure - 2020
            </title>
            <author initials="T." surname="Kurahashi">
          <organization></organization>
              <organization/>
            </author>
            <author initials="Y." surname="Matsuzaki">
          <organization></organization>
              <organization/>
            </author>
            <author initials="T." surname="Sasaki">
          <organization></organization>
              <organization/>
            </author>
            <author initials="T." surname="Saito">
          <organization></organization>
              <organization/>
            </author>
            <author initials="F." surname="Tsutsuji">
          <organization></organization>
              <organization/>
            </author>
            <date day="" month="Dec" month="December" year="2020"/>
          </front>
      <seriesInfo name="" value="Internet
          <refcontent>Internet Initiative Japan Inc.</refcontent>
          <refcontent>Internet Infrastructure Review, vol. 49"/> 49</refcontent>
        </reference>

        <reference anchor="LEN2015"
    target="http://www.hit.bme.hu/~lencse/publications/e98-b_8_1580.pdf"> target="https://www.hit.bme.hu/~lencse/publications/e98-b_8_1580.pdf">
          <front>
            <title>Estimation of the Port Number Consumption of Web Browsing
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>
            <date day="1" month="8" month="August" year="2015"/>
          </front>
      <seriesInfo name="" value="IEICE
          <refcontent>IEICE Transactions on Communications, vol. E98-B, no. 8. pp. 1580-1588"/> 1580-1588</refcontent>
          <seriesInfo name="DOI" value="DOI: 10.1587/transcom.E98.B.1580"/> value="10.1587/transcom.E98.B.1580"/>
        </reference>

        <reference anchor="LEN2020" target="http://ijates.org/index.php/ijates/article/view/291">
          <front>
            <title>Adding RFC 4814 Random Port Feature to Siitperf: Design, Implementation and Performance Estimation
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>
            <date day="" month="" month="November" year="2020"/>
          </front>
      <seriesInfo name="" value="International
          <refcontent>International Journal of Advances in Telecommunications, Electrotechnics, Signals and Systems, vol 9, no 3, pp. 18-26."/> 18-26.</refcontent>
          <seriesInfo name="DOI" value="10.11601/ijates.v9i3.291"/>
        </reference>

        <reference anchor="LEN2022" target="https://www.sciencedirect.com/science/article/pii/S0140366422001803">
          <front>
            <title>Design and Implementation of a Software Tester for Benchmarking Stateful NAT64xy Gateways: Theory and Practice of Extending Siitperf for Stateful Tests
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>
            <date day="1" month="August" year="2022"/>
          </front>
      <seriesInfo name="" value="Computer
          <refcontent>Computer Communications, vol. 172, no. 1, 192, pp. 75-88"/> 75-88</refcontent>
          <seriesInfo name="DOI" value="10.1016/j.comcom.2022.05.028"/>
        </reference>

        <reference anchor="LEN2023" target="https://www.sciencedirect.com/science/article/pii/S0140366423002931">
          <front>
            <title>Benchmarking methodology for stateful NAT64 gateways
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>
            <author initials="K." surname="Shima">
          <organization></organization>
              <organization/>
            </author>
            <author initials="K." surname="Cho">
          <organization></organization>
              <organization/>
            </author>
            <date day="1" month="October" year="2023"/>
          </front>
      <seriesInfo name="" value="Computer
          <refcontent>Computer Communications, vol. 210, no. 1, pp. 256-272"/> 256-272</refcontent>
          <seriesInfo name="DOI" value="10.1016/j.comcom.2023.08.009"/>
        </reference>

        <reference anchor="LEN2024a" target="https://www.sciencedirect.com/science/article/pii/S0140366424000999">
          <front>
            <title>Benchmarking methodology for IPv4aaS technologies:
		Comparison of the scalability of the Jool implementation of 464XLAT and MAP-T
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>
            <author initials="Á." surname="Bazsó">
          <organization></organization>
              <organization/>
            </author>
            <date day="1" month="April" year="2024"/>
          </front>
      <seriesInfo name="" value="Computer
          <refcontent>Computer Communications, vol. 219, no. 1, pp. 243-258"/> 243-258</refcontent>
          <seriesInfo name="DOI" value="10.1016/j.comcom.2024.03.007"/>
        </reference>

        <reference anchor="LEN2024b" target="https://www.sciencedirect.com/science/article/abs/pii/S0140366424001993">
          <front>
            <title>Making stateless and stateful network performance measurements unbiased
            </title>
            <author initials="G." surname="Lencse">
          <organization></organization>
              <organization/>
            </author>

        <!--
            <date day="1" month="April" month="September" year="2024"/> -->
	  </front>
      <seriesInfo name="" value="Computer Communications"/>
          <refcontent>Computer Communications, vol. 225, pp. 141-155</refcontent>
          <seriesInfo name="DOI" value="10.1016/j.comcom.2024.05.018"/>
        </reference>

        <reference anchor="SIITPERF" target="https://github.com/lencsegabor/siitperf">
          <front>
            <title>Siitperf: An RFC 8219 compliant SIIT and stateful NAT64/NAT44 tester written in C++ using DPDK
            </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
            <author>
              <organization/>
            </author>
            <date day="" month="" year="2019-2023" /> month="September" year="2023"/>
          </front>
      <seriesInfo name="" value="source code"/>
      <seriesInfo name="" value="available from GitHub"/>
	  <refcontent>commit 165cb7f</refcontent>
        </reference>
	<!-- 	-->
      </references>
    </references>

    <section anchor="change_log" title="Change Log">
    <section title="00">
      <t>Initial version.
      </t>
    </section>
    <section title="01">
      <t>Updates based on the comments received on the BMWG mailing list anchor="Acknowledgements" numbered="false" toc="default">
      <name>Acknowledgements</name>

      <t>The authors would like to thank <contact fullname="Al Morton"/>,
      <contact fullname="Sarah Banks"/>, <contact fullname="Edwin Cordeiro"/>,
      <contact fullname="Lukasz Bromirski"/>, <contact fullname="Sándor
      Répás"/>, <contact fullname="Tamás Hetényi"/>, <contact
      fullname="Timothy Winters"/>, <contact fullname="Eduard Vasilenko"/>,
      <contact fullname="Minh Ngoc Tran"/>, <contact fullname="Paolo
      Volpato"/>, <contact fullname="Zeqi Lai"/>, and <contact
      fullname="Bertalan Kovács"/> for their comments.</t>
      <t>The authors thank <contact fullname="Warren Kumari"/>, <contact
      fullname="Michael Scharf"/>, <contact fullname="Alexey Melnikov"/>,
      <contact fullname="Robert Sparks"/>, <contact fullname="David Dong"/>,
      <contact fullname="Roman Danyliw"/>, <contact fullname="Erik Kline"/>,
      <contact fullname="Murray Kucherawy"/>, <contact fullname="Zaheduzzaman
      Sarker"/>, and <contact fullname="Éric Vyncke"/> for their reviews and minor corrections.
      </t>
    </section>
    <section title="02">
      <t><xref target="ctrl_conntrack"/>
      comments.</t>
      <t>This work was completely re-written. As a consequence,
	  the occurrences of the now undefined "mostly different" source port number destination
	  port number combinations were deleted from <xref target="meas_max_conn_est_rate"/>,
	  too.
      </t>
    </section>
    <section title="03">
      <t>Added <xref target="consider_stateful"/> about the consideration of the
	  cases of stateful operation.
      </t>
      <t>Consistency checking. Removal of some parts obsoleted supported by the previous re-writing
	  of <xref target="ctrl_conntrack"/>.
      </t>
      <t>Added <xref target="meas_conn_tear_down_rate"/> about the method for measuring connection tear-down rate.
      </t>
      <t>Updates for <xref target="impl_exp"/> about the implementation and experience.
      </t>
    </section>
    <section title="04">
      <t>Update Japan Trust International Research
      Cooperation Program of the abstract.
      </t>
      <t>Added <xref target="validation_of_conn"/> about validation National Institute of connection establishment.
      </t>
      <t>Added <xref target="meas_contr_capacity"/> about the method for measuring connection tracking table capacity.
      </t>
      <t>Consistency checking Information and corrections.
      </t>
      Communications Technology (NICT), Japan.</t>
    </section>
    <section title="00
  </back>

<!-- [rfced] FYI - WG item">
      <t>Added measurement setup We have added expansions for Stateful NAT64 gateways.
      </t>
      <t>Consistency checking and corrections.
      </t>
    </section>
    <section title="01">
      <t>Added abbreviations upon first use
per Section 4.5.1 about typical types 3.6 of measurement series and reporting format.
      </t>
    </section>
    <section title="02">
      <t>Added RFC 7322 ("RFC Style Guide"). Please review each
expansion in the usage document carefully to ensure correctness.

Border Relay (BR)
Mapping of multiple IP addresses.</t>
	  <t>Section 4.5.1 was removed and split into two Sections:
	  <xref target="meas_scalability"/> about scalability measurements Address and
	  <xref target="reporting_format"/> about reporting format.
      </t>
    </section>
    <section title="03">
      <t>Updated Port using Translation (MAP-T)
Datagram Congestion Control Protocol (DCCP)
Stream Control Transmission Protocol (SCTP)

-->

<!-- [rfced] Please review the usage "Inclusive Language" portion of multiple IP addresses.</t>
	  <t>Test phases were renamed as follows:
	  <list style="symbols">
		<t>preliminary test phase --> test phase 1</t>
		<t>real test phase --> test phase 2.</t>
	  </list>
      </t>
    </section>
    <section title="04">
      <t>Minor updates to <xref target="setup_term_multiple"/> and <xref target="impl_exp"/>.</t>
    </section>
    <section title="05">
      <t>Minor updates addressing WGLC nits (adding the definition online
Style Guide <https://www.rfc-editor.org/styleguide/part2/#inclusive_language>
and let us know if any changes are needed.  Updates of "black box", this nature typically
result in more precise language, which is helpful for readers.

a. For example, please consider whether "black" should be updated.

b. In addition, please consider whether "tradition" and
	  performing "traditional" should
be updated for clarity. While the NIST website
<https://www.nist.gov/nist-research-library/nist-technical-series-publications-author-instructions#table1>
indicates that this term is potentially biased, it is also ambiguous.
"Tradition" is a high amount of grammatical corrections).</t>
    </section>
    <section title="06">
      <t>Language editing addressing preliminary AD review comments by eliminating subjective term, as it is not the occurrences
	  of first person singular ("we", "our").</t>
    </section>
    <section title="07">
      <t>Updates addressing IESG Last Call comments.</t>
    </section>
  </section>
  </back> same for everyone.

-->

</rfc>