diff --git a/parmys/parmys-plugin/core/multiplier.cc b/parmys/parmys-plugin/core/multiplier.cc index 3a411217f25..6291d6f01b4 100644 --- a/parmys/parmys-plugin/core/multiplier.cc +++ b/parmys/parmys-plugin/core/multiplier.cc @@ -937,7 +937,7 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b) *-----------------------------------------------------------------------*/ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist) { - nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig; + nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addsmall2, *addbig; int size; /* Check for a legitimate split */ @@ -976,50 +976,153 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t * init_split_multiplier(node, a1b0, a0, a1, 0, b0, a1b1, a0b0); mult_list = insert_in_vptr_list(mult_list, a1b0); - /* New node for the initial add */ - addsmall = allocate_nnode(node->loc); - addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); - strcpy(addsmall->name, node->name); - strcat(addsmall->name, "-add0"); - // this addition will have a carry out in the worst case, add to input pins and connect then to gnd - init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); - - /* New node for the BIG add */ - addbig = allocate_nnode(node->loc); - addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); - strcpy(addbig->name, node->name); - strcat(addbig->name, "-add1"); - init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); - - // connect inputs to port a of addsmall - for (int i = 0; i < a1b0->num_output_pins; i++) - connect_nodes(a1b0, i, addsmall, i); - add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); - // connect inputs to port b of addsmall - for (int i = 0; i < a0b1->num_output_pins; i++) - connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); - add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); - - // connect inputs to port a of addbig - size = addsmall->num_output_pins; - for (int i = 0; i < size; i++) - connect_nodes(addsmall, i, addbig, i); - - // connect inputs to port b of addbig - for (int i = b0; i < a0b0->output_port_sizes[0]; i++) - connect_nodes(a0b0, i, addbig, i - b0 + size); - size = size + a0b0->output_port_sizes[0] - b0; - for (int i = 0; i < a1b1->output_port_sizes[0]; i++) - connect_nodes(a1b1, i, addbig, i + size); - - // remap the multiplier outputs coming directly from a0b0 - for (int i = 0; i < b0; i++) { - remap_pin_to_new_node(node->output_pins[i], a0b0, i); - } + // using the balenced addition method only works if a0 and b0 are the same size + // (i.e. if the input ports on the hardware multiplier are equal) + if (b0 == a0) { + /* New node for the initial add */ + addsmall = allocate_nnode(node->loc); + addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall->name, node->name); + strcat(addsmall->name, "-add0"); + // this addition will have a carry out in the worst case, add to input pins and connect then to gnd + init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); + + // connect inputs to port a of addsmall + for (int i = 0; i < a1b0->num_output_pins; i++) + connect_nodes(a1b0, i, addsmall, i); + + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); + // connect inputs to port b of addsmall + for (int i = 0; i < a0b1->num_output_pins; i++) + connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); + + /* New node for the BIG add */ + addbig = allocate_nnode(node->loc); + addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addbig->name, node->name); + strcat(addbig->name, "-add1"); + init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); + + // connect inputs to port a of addbig + size = addsmall->num_output_pins; + for (int i = 0; i < size; i++) + connect_nodes(addsmall, i, addbig, i); + + // connect inputs to port b of addbig + for (int i = b0; i < a0b0->output_port_sizes[0]; i++) + connect_nodes(a0b0, i, addbig, i - b0 + size); + size = size + a0b0->output_port_sizes[0] - b0; + for (int i = 0; i < a1b1->output_port_sizes[0]; i++) + connect_nodes(a1b1, i, addbig, i + size); + + // remap the multiplier outputs coming directly from a0b0 + for (int i = 0; i < b0; i++) { + remap_pin_to_new_node(node->output_pins[i], a0b0, i); + } + + // remap the multiplier outputs coming from addbig + for (int i = 0; i < addbig->num_output_pins; i++) { + remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); + } + } else { + /* Expounding upon the description for the method in this function. + if we have two numbers A and B and we have a hardware multiplier of size a0xb0, + we can split them into two parts: + A = A1 << a0 + A0 + B = B1 << b0 + B0 + where A1 and B1 are the high bits of A and B, and A0 and B0 are the low bits. + Note that len(A0) = a0 and len(B0) = b0 by definition. + The multiplication of A and B can be expressed as: + A * B = (A1 << a0 + A0) * (B1 << b0 + B0) + = {A1 * B1 << (a0 + b0)} + {(A1 * B0) << a0 + (A0 * B1) << b0} + {A0 * B0} + we define split the editions up like so: + addsmall = (A1 * B0) << a0 + (A0 * B1) << b0 // can have carry + addsmall2 = (A1 * B1 << (a0 + b0)) + (A0 * B0) // Will not have carry + addbig = addsmall + addsmall2 + This is a slightly modified version of the Karatsuba algorithm. + */ + /////////////// Addsmall ///////////////////// + addsmall = allocate_nnode(node->loc); + addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall->name, node->name); + strcat(addsmall->name, "-add0"); + init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + a0 + 1, a0b1->num_output_pins + b0 + 1); + + // The first a0 pins of addsmall input connecting to a1b0 are connected to zero + for (int i = 0; i < a0; i++) { + add_input_pin_to_node(addsmall, get_zero_pin(netlist), i); + } + + // connect inputs to port a of addsmall + for (int i = 0; i < a1b0->num_output_pins; i++) { + connect_nodes(a1b0, i, addsmall, i + a0); + } + + // add zero pin for carry + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins + a0); + + // The first b0 pins of addsmall input connecting to a0b1 are connected to zero + for (int i = 0; i < b0; i++) { + add_input_pin_to_node(addsmall, get_zero_pin(netlist), i + addsmall->input_port_sizes[0]); + } + + // connect inputs to port b of addsmall + for (int i = 0; i < a0b1->num_output_pins; i++) { + connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0] + b0); + } + + // add zero pin for carry + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0] + b0); + + /////////////// Addsmall2 ///////////////////// + addsmall2 = allocate_nnode(node->loc); + addsmall2->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall2->name, node->name); + strcat(addsmall2->name, "-add1"); + init_multiplier_adder(addsmall2, a1b1, a1b1->num_output_pins + a0 + b0, a0b0->num_output_pins); - // remap the multiplier outputs coming from addbig - for (int i = 0; i < addbig->num_output_pins; i++) { - remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); + // connect first a0+ b0 pins of addsmall2 to zero + for (int i = 0; i < a0 + b0; i++) { + add_input_pin_to_node(addsmall2, get_zero_pin(netlist), i); + } + + // connect inputs to port a of addsmall2 + for (int i = 0; i < a1b1->num_output_pins; i++) { + connect_nodes(a1b1, i, addsmall2, i + a0 + b0); + } + + // connect inputs to port b of addsmall2 + for (int i = 0; i < a0b0->output_port_sizes[0]; i++) { + connect_nodes(a0b0, i, addsmall2, i + addsmall2->input_port_sizes[0]); + } + + /////////////// Addbig ///////////////////// + addbig = allocate_nnode(node->loc); + addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addbig->name, node->name); + strcat(addbig->name, "-add2"); + init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, addsmall2->num_output_pins); + // Here the final addition can have a carry out in the worst case, however, + // our final product will always only be the length of the longest input port so regardless of the carry the + // final adds carry will always drop out. + + // connect inputs to port a of addbig + for (int i = 0; i < addsmall->num_output_pins; i++) { + connect_nodes(addsmall, i, addbig, i); + } + // add_input_pin_to_node(addbig, get_zero_pin(netlist), addsmall->num_output_pins); + + // connect inputs to port b of addbig + for (int i = 0; i < addsmall2->num_output_pins; i++) { + connect_nodes(addsmall2, i, addbig, i + addbig->input_port_sizes[0]); + } + // add_input_pin_to_node(addbig, get_zero_pin(netlist), addbig->input_port_sizes[0] + addsmall->num_output_pins); + + // remap the multiplier outputs coming directly from a0b0 + for (int i = 0; i < addbig->num_output_pins; i++) { + remap_pin_to_new_node(node->output_pins[i], addbig, i); + } } // CLEAN UP @@ -1060,7 +1163,6 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b) strcat(a0b->name, "-0"); init_split_multiplier(node, a0b, 0, a0, 0, b, nullptr, nullptr); mult_list = insert_in_vptr_list(mult_list, a0b); - /* New node for a1b multiply */ a1b = allocate_nnode(node->loc); a1b->name = (char *)vtr::malloc(strlen(node->name) + 3); @@ -1184,7 +1286,6 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) oassert(node->type == MULTIPLY); oassert(hard_multipliers != NULL); - sizea = node->input_port_sizes[0]; sizeb = node->input_port_sizes[1]; sizeout = node->output_port_sizes[0]; @@ -1199,6 +1300,13 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) } diffa = ina - sizea; diffb = inb - sizeb; + // input multiplier size on middle range of unequal Hard Block size(ex; mul_size>18 && mul_size<25) + if (diffb < 0) { + std::swap(ina, inb); + diffa = ina - sizea; + diffb = inb - sizeb; + } + diffout = hard_multipliers->outputs->size - sizeout; if (configuration.split_hard_multiplier == 1) { @@ -1281,11 +1389,10 @@ void iterate_multipliers(netlist_t *netlist) int mula, mulb; int a0, a1, b0, b1; nnode_t *node; - /* Can only perform the optimisation if hard multipliers exist! */ if (hard_multipliers == NULL) return; - + // std::cin.get(); sizea = hard_multipliers->inputs->size; sizeb = hard_multipliers->inputs->next->size; if (sizea < sizeb) { @@ -1313,7 +1420,6 @@ void iterate_multipliers(netlist_t *netlist) sizea = sizeb; sizeb = swap; } - /* Do I need to split the multiplier on both inputs? */ if ((mula > sizea) && (mulb > sizeb)) { a0 = sizea; @@ -1890,4 +1996,4 @@ void free_multipliers() hard_multipliers->instances = NULL; } -} +} \ No newline at end of file diff --git a/parmys/parmys-plugin/netlist/netlist_utils.cc b/parmys/parmys-plugin/netlist/netlist_utils.cc index 9c3fb060b4f..60fc7025f04 100644 --- a/parmys/parmys-plugin/netlist/netlist_utils.cc +++ b/parmys/parmys-plugin/netlist/netlist_utils.cc @@ -485,6 +485,7 @@ void remap_pin_to_new_net(npin_t *pin, nnet_t *new_net) *-----------------------------------------------------------------------*/ void remap_pin_to_new_node(npin_t *pin, nnode_t *new_node, int pin_idx) { + oassert(pin != NULL); if (pin->type == INPUT) { /* clean out the entry in the old net */ pin->node->input_pins[pin->pin_node_idx] = NULL; diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt index d1b5a73eb72..956f23e6634 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt @@ -12,6 +12,7 @@ circuits_dir=benchmarks/verilog arch_list_add=7series_BRAM_DSP_carry.xml # Add circuits to list to sweep +circuit_list_add=mcml.v circuit_list_add=LU32PEEng.v circuit_list_add=LU8PEEng.v circuit_list_add=bgm.v diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt index da5393b0e1c..788ced44c7a 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt @@ -1,7 +1,8 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time -7series_BRAM_DSP_carry.xml LU32PEEng.v common 8462.06 vpr 3.48 GiB -1 -1 129.11 1222556 93 791.19 -1 -1 380736 -1 -1 -1 114 153 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 3646292 114 102 123003 108818 1 77498 10464 113 113 12769 CLB auto 1072.1 MiB 4396.87 7.43631e+06 1262567 15340198 6033356 8730977 575865 2684.0 MiB 2280.76 12.98 135.884 109.205 -232244 -109.205 109.205 0.16 0.360282 0.320818 54.09 46.7631 -1 -1 -1 -1 1465145 11.9176 653098 5.31233 473500 2451803 465565033 75467213 7.58182e+08 6.15544e+08 2.46355e+08 19293.2 19 5806063 87504646 2231668 86.4379 86.4379 -454383 -86.4379 0 0 128.89 59.66 17.27 3560.6 MiB 384.96 68.2088 58.22 2684.0 MiB 0.16 263.36 -7series_BRAM_DSP_carry.xml LU8PEEng.v common 1748.11 vpr 1.03 GiB -1 -1 43.47 387956 98 87.95 -1 -1 117496 -1 -1 -1 114 45 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 1076724 114 102 36473 32106 1 22515 3201 62 62 3844 CLB auto 334.7 MiB 1220.93 1.18078e+06 296971 2644273 850518 1614737 179018 814.9 MiB 219.67 2.12 125.894 111.93 -52668.6 -111.93 111.93 0.05 0.0878002 0.0782625 11.1992 9.49294 -1 -1 -1 -1 347231 9.52753 164732 4.52002 139504 610360 78202730 8509396 2.21078e+08 1.79004e+08 7.33801e+07 19089.5 17 1711877 25804105 642295 88.66 88.66 -80517 -88.66 0 0 36.95 16.76 4.47 1051.3 MiB 59.59 14.4391 12.1487 814.9 MiB 0.15 56.68 -7series_BRAM_DSP_carry.xml bgm.v common 1453.26 vpr 1.09 GiB -1 -1 34.02 505276 14 87.55 -1 -1 124216 -1 -1 -1 257 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 1141844 257 32 36772 33710 1 23560 3586 66 66 4356 CLB auto 366.4 MiB 895.18 1.43373e+06 291085 2774622 943137 1789486 41999 912.5 MiB 219.36 2.13 38.4025 22.1265 -27810.4 -22.1265 22.1265 0.05 0.0889869 0.0799615 10.9486 9.40508 -1 -1 -1 -1 410749 11.1711 193767 5.26985 171335 749781 115947662 12762222 2.52497e+08 1.8145e+08 8.29171e+07 19035.1 18 1855857 29118461 737145 20.7189 20.7189 -28709.3 -20.7189 0 0 41.79 19.45 5.47 1108.0 MiB 80.62 14.4948 12.3194 912.5 MiB 0.15 67.83 -7series_BRAM_DSP_carry.xml stereovision0.v common 631.20 vpr 478.05 MiB -1 -1 8.43 103696 5 7.86 -1 -1 69424 -1 -1 -1 169 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 489528 169 197 23125 21329 1 9476 1664 42 42 1764 CLB auto 184.2 MiB 521.98 200804 67090 888704 241514 572204 74986 403.1 MiB 25.54 0.26 6.25602 4.24154 -16239.3 -4.24154 4.24154 0.02 0.0285299 0.0249303 2.90125 2.47762 -1 -1 -1 -1 64191 2.77799 33017 1.42887 50936 110793 12919651 1591963 9.88618e+07 6.99558e+07 3.28179e+07 18604.2 11 823277 11448038 269515 3.62506 3.62506 -19319.1 -3.62506 0 0 16.19 7.33 2.13 478.1 MiB 16.34 3.63149 3.10496 403.1 MiB 0.15 26.15 -7series_BRAM_DSP_carry.xml stereovision1.v common 657.92 vpr 836.50 MiB -1 -1 7.17 118048 3 11.50 -1 -1 72280 -1 -1 -1 115 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 856580 115 145 22828 19265 1 10325 1531 66 66 4356 DSP auto 179.1 MiB 458.64 311225 103708 909991 287175 612059 10757 778.3 MiB 24.00 0.24 5.73398 5.10375 -20985.5 -5.10375 5.10375 0.05 0.0277094 0.0241873 3.12136 2.68518 -1 -1 -1 -1 100891 4.42136 40545 1.77681 54616 114442 15706832 1926588 2.52497e+08 9.58701e+07 8.29171e+07 19035.1 11 1520183 28962658 248173 4.70403 4.70403 -25526.2 -4.70403 0 0 41.56 14.80 2.43 836.5 MiB 33.01 3.84255 3.31109 778.3 MiB 0.15 72.12 -7series_BRAM_DSP_carry.xml stereovision2.v common 2311.65 vpr 2.05 GiB -1 -1 10.93 180464 3 5.19 -1 -1 140368 -1 -1 -1 149 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 2144512 149 182 55133 36882 1 33040 4000 106 106 11236 DSP auto 329.2 MiB 1607.37 1.70601e+06 435687 3463551 1091033 2221855 150663 1922.9 MiB 232.94 1.79 22.4663 15.6766 -47524.7 -15.6766 15.6766 0.14 0.0743063 0.066452 9.08666 7.81491 -1 -1 -1 -1 417311 7.57137 134875 2.44707 129629 213413 54356800 6676827 6.67318e+08 2.77775e+08 2.17352e+08 19344.2 15 3832009 76669346 592051 15.1625 15.1625 -61375.1 -15.1625 0 0 116.92 41.18 6.35 2094.2 MiB 105.70 11.7825 10.0862 1922.9 MiB 0.15 210.17 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +7series_BRAM_DSP_carry.xml mcml.v common 6028.93 vpr 4.29 GiB -1 -1 112.39 1324532 26 1575.37 -1 -1 397412 -1 -1 -1 36 159 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 4493412 36 356 199703 166091 1 96432 14448 132 132 17424 CLB auto 1373.1 MiB 1554.60 7.30767e+06 1510965 30795198 11005989 11799313 7989896 3575.2 MiB 2353.07 8.99 114.688 75.4926 -385876 -75.4926 75.4926 0.11 0.144559 0.12413 27.5533 22.7998 -1 -1 -1 -1 -1 1044733 17 1.04106e+09 8.39563e+08 3.37280e+08 19357.2 132.18 32.8963 27.1671 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml LU32PEEng.v common 4021.40 vpr 3.52 GiB -1 -1 89.23 1523944 97 420.80 -1 -1 364304 -1 -1 -1 114 153 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 3686084 114 102 123962 109723 1 78255 10604 114 114 12996 CLB auto 1078.7 MiB 2226.89 7.50175e+06 1274738 15614502 6135197 9031079 448226 2727.3 MiB 917.29 6.53 143.442 109.472 -246691 -109.472 109.472 0.08 0.141279 0.121064 21.4205 17.832 -1 -1 -1 -1 -1 1465950 19 7.77041e+08 6.23088e+08 2.51453e+08 19348.5 154.91 27.165 22.4874 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml LU8PEEng.v common 866.13 vpr 1.03 GiB -1 -1 28.05 472768 98 39.42 -1 -1 117816 -1 -1 -1 114 45 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 1079760 114 102 36706 32285 1 22732 3211 62 62 3844 CLB auto 336.8 MiB 618.57 1.07733e+06 293198 2726296 910892 1594196 221208 816.6 MiB 93.59 0.82 135.375 111.358 -52204.1 -111.358 111.358 0.02 0.0342582 0.0290691 4.52154 3.74248 -1 -1 -1 -1 -1 349285 16 2.21078e+08 1.79543e+08 7.33801e+07 19089.5 31.47 5.71966 4.71057 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml bgm.v common 738.51 vpr 1.09 GiB -1 -1 23.62 654168 14 38.57 -1 -1 124128 -1 -1 -1 257 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 1147576 257 32 37283 34221 1 23859 3594 66 66 4356 CLB auto 369.5 MiB 486.01 1.42717e+06 292842 2727876 904302 1772661 50913 914.3 MiB 89.88 0.92 36.395 23.2057 -27798.9 -23.2057 23.2057 0.03 0.0420198 0.0365039 4.49405 3.75685 -1 -1 -1 -1 -1 405172 17 2.52497e+08 1.81881e+08 8.29171e+07 19035.1 37.75 5.87139 4.87957 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml stereovision0.v common 305.97 vpr 477.68 MiB -1 -1 4.47 103400 5 2.93 -1 -1 70128 -1 -1 -1 169 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 489148 169 197 23225 21365 1 9660 1645 42 42 1764 CLB auto 182.7 MiB 257.68 191242 69741 865479 231951 560905 72623 401.9 MiB 11.92 0.12 6.0701 4.03928 -16398 -4.03928 4.03928 0.01 0.0109682 0.00913863 1.16208 0.959963 -1 -1 -1 -1 -1 65287 12 9.88618e+07 6.89317e+07 3.28179e+07 18604.2 7.69 1.47609 1.21957 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml stereovision1.v common 329.87 vpr 834.59 MiB -1 -1 3.84 114924 3 4.76 -1 -1 71964 -1 -1 -1 115 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 854616 115 145 22865 19302 1 10319 1528 66 66 4356 DSP auto 177.7 MiB 240.81 312166 102042 872431 274471 588095 9865 776.7 MiB 10.52 0.10 5.768 4.76067 -21233.7 -4.76067 4.76067 0.03 0.0112499 0.00937755 1.28065 1.06586 -1 -1 -1 -1 -1 102221 14 2.52497e+08 9.57084e+07 8.29171e+07 19035.1 15.90 1.64146 1.36816 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +7series_BRAM_DSP_carry.xml stereovision2.v common 1075.22 vpr 2.05 GiB -1 -1 5.50 172452 3 2.26 -1 -1 140992 -1 -1 -1 149 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 2146896 149 182 55416 37075 1 33280 4031 106 106 11236 DSP auto 331.5 MiB 770.59 1.74668e+06 450674 3595535 1133232 2334220 128083 1925.1 MiB 87.23 0.75 22.9595 15.3724 -48322.5 -15.3724 15.3724 0.07 0.0319348 0.027826 3.8538 3.21492 -1 -1 -1 -1 -1 436136 16 6.67318e+08 2.79446e+08 2.17352e+08 19344.2 53.10 4.97283 4.15748 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt index 67ebcf3f6cb..ccc1aea6050 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt @@ -6,6 +6,7 @@ arch_list_add=7series_BRAM_DSP_carry.xml # Add circuits to list to sweep circuit_list_add=stereovision3.v +circuit_list_add=diffeq2.v # Parse info and how to parse diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt index f1c30f92f81..c5f006aa489 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt @@ -1,2 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -7series_BRAM_DSP_carry.xml stereovision3.v common 3.90 vpr 71.88 MiB -1 -1 0.31 24572 5 0.11 -1 -1 32512 -1 -1 -1 11 0 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 73604 11 2 303 283 2 114 35 7 7 49 CLB auto 32.5 MiB 1.52 569.025 404 890 113 545 232 71.9 MiB 0.02 0.00 3.1717 3.1717 -181.811 -3.1717 2.89952 0.16 0.000406774 0.000362597 0.00963025 0.00880151 -1 -1 -1 -1 44 491 11 1.34735e+06 1.18567e+06 177202. 3616.36 0.69 0.0981787 0.0839421 6848 92556 -1 373 9 263 823 105258 48434 2.83816 2.83244 -218.271 -2.83816 -2.452 -0.04 257836. 5261.96 0.04 0.03 0.07 -1 -1 0.04 0.0161707 0.0149638 +7series_BRAM_DSP_carry.xml stereovision3.v common 2.53 vpr 72.59 MiB -1 -1 0.33 26408 4 0.08 -1 -1 36120 -1 -1 -1 11 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 74336 11 2 303 283 2 114 35 7 7 49 CLB auto 33.2 MiB 1.18 577.007 408 947 108 584 255 72.6 MiB 0.01 0.00 3.1717 3.1717 -180.982 -3.1717 2.89952 0.12 0.000171885 0.000143559 0.00458104 0.00401289 -1 -1 -1 -1 40 889 22 1.34735e+06 1.18567e+06 152291. 3107.98 0.22 0.0179847 0.0160687 6668 73471 -1 385 12 297 988 127228 60728 2.91111 2.8252 -221.503 -2.91111 -2.452 -0.04 215465. 4397.25 0.02 0.02 0.04 -1 -1 0.02 0.00962131 0.00895172 +7series_BRAM_DSP_carry.xml diffeq2.v common 48.98 vpr 129.37 MiB -1 -1 0.19 27844 5 0.09 -1 -1 38944 -1 -1 -1 66 0 -1 success v8.0.0-13067-gda604502c-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-06-16T13:26:41 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 132476 66 96 1819 1080 1 1150 336 26 26 676 DSP auto 40.6 MiB 2.77 20198.7 9188 64521 14520 45607 4394 119.7 MiB 0.65 0.01 22.6842 19.668 -1065.49 -19.668 19.668 3.45 0.000811128 0.000723384 0.0536543 0.0478746 -1 -1 -1 -1 74 12093 17 3.53732e+07 1.31407e+07 5.36197e+06 7931.91 33.65 0.395666 0.356239 133518 2720184 -1 10986 14 5828 10018 3165525 816384 19.4143 19.4143 -1210.49 -19.4143 -1.7 -0.034 6.54552e+06 9682.72 1.89 0.38 1.55 -1 -1 1.89 0.0458825 0.0428936 \ No newline at end of file