Start with the data:
> dput(Data1) structure(list(X1 = structure(c(17L, 14L, 20L, 16L, 1L, 2L, 3L, 4L, 15L, 8L, 9L, 10L, 11L, 12L, 13L, 21L, 22L, 23L, 18L, 19L, 5L, 6L, 7L), .Label = c("Astra_1", "Astra_2", "Astra_3", "Astra_4", "Audi_1", "Audi_2", "Audi_3", "BMW_1", "BMW_2", "BMW_3", "BMW_4", "BMW_5", "Fiat_1", "Mazda_2", "Mercedes_1", "Nexia_1", "Porsche_1", "Scania_1", "Scania_2", "Tico_1", "VW_1", "VW_2", "VW_3"), class = "factor"), X2 = structure(c(2L, 3L, 10L, 7L, 8L, 12L, 9L, 14L, 11L, 4L, 5L, 6L, 15L, 13L, 4L, 5L, 9L, 14L, 11L, 1L, 3L, 10L, 16L), .Label = c("Astra_1", "Astra_3", "Astra_4", "Audi_1", "Audi_2", "Audi_3", "BMW_1", "BMW_2", "Mazda_2", "Mercedes_1", "Nexia_1", "Porsche_1", "Scania_2", "Tico_1", "VW_2", "VW_3" ), class = "factor"), AUC_1 = c(5860133.702, 1296009.939, 333123.4932, 250348.9407, 1376193.334, 4080502.863, 3777603.233, 3503973.487, 99101538.62, 231873.8462, 87258.75465, 147430.9913, 1028986.892, 1451482.832, 8136.72382, 25311.41683, 131352.7137, 565410.8186, 30196.23792, 70184.82268, 2526321.019, 381643.2138, 819687.9824), AUC_2 = c(4849720.322, 928980.4715, 320547.6185, 223287.2029, 1340641.323, 4720329.699, 4369150.434, 3371021.243, 108591253.3, 266489.7601, 85384.84604, 165726.7626, 1052130.559, 1470876.65, 9499.927679, 49309.74984, 138482.765, 444600.7911, 25132.73714, 55453.67019, 2038911.81, 422559.3293, 1445477.433 ), ratio = c(1.20834467, 1.395088463, 1.03923247, 1.121196994, 1.02651866, 0.864452935, 0.864608186, 1.039439753, 0.91261069, 0.87010415, 1.021946618, 0.889602795, 0.978003046, 0.98681479, 0.856503765, 0.513314647, 0.948513078, 1.271726974, 1.201470327, 1.265647926, 1.2390536, 0.90317072, 0.567070757), Country = structure(c(1L, 1L, 2L, 3L, 5L, 1L, 5L, 1L, 4L, 7L, 4L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 8L, 8L, 6L, 6L, 7L), .Label = c("France", "Germany", "Italy", "Norway", "Poland", "Spain", "Sweden", "Ukraine" ), class = "factor"), Comp = structure(c(3L, 5L, 16L, 9L, 8L, 9L, 12L, 14L, 4L, 15L, 11L, 14L, 16L, 17L, 10L, 10L, 12L, 13L, 1L, 2L, 5L, 6L, 7L), .Label = c("11,12", "12,13", "12,13,14", "14,15", "14,15,16", "15,16,17", "16,17,18", "2,3", "2,3,4", "3,4", "3,4,5", "4,5,6", "5,6", "5,6,7", "5,6,7,8", "6,7,8", "7,8,9"), class = "factor")), .Names = c("X1", "X2", "AUC_1", "AUC_2", "ratio", "Country", "Comp"), class = "data.frame", row.names = c(NA, -23L))
The data chapter looks like this:
X1 X2 AUC_1 AUC_2 ratio Country Comp 1 Porsche_1 Astra_3 5860133.7 4849720.3 1.2083447 France 12,13,14 2 Mazda_2 Astra_4 1296009.9 928980.5 1.3950885 France 14,15,16 3 Tico_1 Mercedes_1 333123.5 320547.6 1.0392325 Germany 6,7,8 4 Nexia_1 BMW_1 250348.9 223287.2 1.1211970 Italy 2,3,4 5 Astra_1 BMW_2 1376193.3 1340641.3 1.0265187 Poland 2,3 6 Astra_2 Porsche_1 4080502.9 4720329.7 0.8644529 France 2,3,4
Now we will focus on the last two columns: Country and Comp . I would like to extract all the rows that contain the same country, and compare if any of the numbers in the Comp column are the same, the rows from X1 and X2 should be stored together - perhaps in separate vectors or in a matrix. It is possible that one row may belong to different "clusters" / "vectors".
An example of the desired conclusion. This is just an example and clustering is completely random. Any output visualization method is valid.
Country 1 2 3 4 5 6 1 France Astra_3 Scania_2 Tico_1 NA NA NA 2 Poland Astra_4 Mazda_2 VW_3 Tico_2 NA NA 3 Sweden Mercedes_1 BMW_1 BMW_2 Audi_1 VW_3 NA 4 Norway BMW_1 Astra_1 Scania_2 Audi_3 NA NA