From b3f1171561db2164d2b48637f7cd2be02f1b7ab8 Mon Sep 17 00:00:00 2001
From: Michael Hoennig <michael.hoennig@hostsharing.net>
Date: Fri, 3 Jan 2025 09:49:20 +0100
Subject: [PATCH] merge identical (ignoring case) persons during import (#142)

Co-authored-by: Michael Hoennig <michael@hoennig.de>
Reviewed-on: https://dev.hostsharing.net/hostsharing/hs.hsadmin.ng/pulls/142
Reviewed-by: Timotheus Pokorra <timotheus.pokorra@hostsharing.net>
---
 .../hs/migration/BaseOfficeDataImport.java    | 48 +++++++++++++++----
 .../hsadminng/hs/migration/CsvDataImport.java |  4 +-
 .../resources/migration/office/contacts.csv   |  5 +-
 3 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/src/test/java/net/hostsharing/hsadminng/hs/migration/BaseOfficeDataImport.java b/src/test/java/net/hostsharing/hsadminng/hs/migration/BaseOfficeDataImport.java
index da16ac35..da89e268 100644
--- a/src/test/java/net/hostsharing/hsadminng/hs/migration/BaseOfficeDataImport.java
+++ b/src/test/java/net/hostsharing/hsadminng/hs/migration/BaseOfficeDataImport.java
@@ -80,6 +80,8 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
         -1, HsOfficePersonType.LEGAL_PERSON
     );
 
+    static Map<String, HsOfficePersonRealEntity> distinctPersons = new WriteOnceMap<>();
+
     static Map<Integer, HsOfficeContactRealEntity> contacts = new WriteOnceMap<>();
     static Map<Integer, HsOfficePersonRealEntity> persons = new WriteOnceMap<>();
     static Map<Integer, HsOfficePartnerEntity> partners = new WriteOnceMap<>();
@@ -225,7 +227,8 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
                    90590=contact(caption='Herr Inhaber R. Wiese, Das Perfekte Haus', emailAddresses='{ "main": "515217@kkemail.example.org"}'),
                    90629=contact(caption='Ragnar Richter', emailAddresses='{ "main": "mail@ragnar-richter..example.org"}'),
                    90677=contact(caption='Eike Henning', emailAddresses='{ "main": "hostsharing@eike-henning..example.org"}'),
-                   90698=contact(caption='Jan Henning', emailAddresses='{ "main": "mail@jan-henning.example.org"}')
+                   90698=contact(caption='Jan Henning', emailAddresses='{ "main": "mail@jan-henning.example.org"}'),
+                   90699=contact(caption='Jan Henning', emailAddresses='{"main": "lists@jan-henning.example.org"}')
                 }
                 """);
         assertThat(toJsonFormattedString(persons)).isEqualToIgnoringWhitespace("""
@@ -247,7 +250,8 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
                    90590=person(personType='??', tradeName='Das Perfekte Haus', salutation='Herr', familyName='Wiese', givenName='Inhaber R.'),
                    90629=person(personType='NP', familyName='Richter', givenName='Ragnar'),
                    90677=person(personType='NP', familyName='Henning', givenName='Eike'),
-                   90698=person(personType='NP', familyName='Henning', givenName='Jan')
+                   90698=person(personType='NP', familyName='Henning', givenName='Jan'),
+                   90699=person(personType='NP', familyName='Henning', givenName='Jan')
                 }
                 """);
         assertThat(toJsonFormattedString(debitors)).isEqualToIgnoringWhitespace("""
@@ -349,7 +353,8 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
                    2000071=rel(anchor='?? Ragnar IT-Beratung', type='SUBSCRIBER', mark='operations-discussion', holder='NP Henning, Eike', contact='Eike Henning'),
                    2000072=rel(anchor='?? Ragnar IT-Beratung', type='SUBSCRIBER', mark='operations-announce', holder='NP Henning, Eike', contact='Eike Henning'),
                    2000073=rel(anchor='?? Ragnar IT-Beratung', type='OPERATIONS_ALERT', holder='NP Henning, Jan', contact='Jan Henning'),
-                   2000074=rel(anchor='?? Ragnar IT-Beratung', type='OPERATIONS', holder='NP Henning, Jan', contact='Jan Henning')
+                   2000074=rel(anchor='?? Ragnar IT-Beratung', type='OPERATIONS', holder='NP Henning, Jan', contact='Jan Henning'),
+                   2000075=rel(anchor='?? Ragnar IT-Beratung', type='SUBSCRIBER', mark='operations-announce', holder='NP Henning, Jan', contact='Jan Henning')
                 }
                 """);
     }
@@ -633,8 +638,6 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
         jpaAttempt.transacted(() -> {
             context(rbacSuperuser);
             persons.forEach(this::persist);
-            relations.forEach((id, rel) -> this.persist(id, rel.getAnchor()));
-            relations.forEach((id, rel) -> this.persist(id, rel.getHolder()));
         }).assertSuccessful();
 
         jpaAttempt.transacted(() -> {
@@ -993,7 +996,7 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
 
                     final var partnerPerson = partner.getPartnerRel().getHolder();
                     if (containsPartnerRel(rec)) {
-                        addPerson(partnerPerson, rec);
+                        partner.getPartnerRel().setHolder(addPerson(partnerPerson, rec));
                     }
 
                     HsOfficePersonRealEntity contactPerson = partnerPerson;
@@ -1044,10 +1047,18 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
                     verifyContainsOnlyKnownRoles(rec.getString("roles"));
                 });
 
+        distinctAnchorAndHolderPersons();
         assertNoMissingContractualRelations();
         useHostsharingAsPartnerAnchor();
     }
 
+    private void distinctAnchorAndHolderPersons() {
+        relations.values().forEach(rel -> {
+            rel.setAnchor(distinctPerson(rel.getAnchor()));
+            rel.setHolder(distinctPerson(rel.getHolder()));
+        });
+    }
+
     private static void assertNoMissingContractualRelations() {
         final var contractualMissing = new HashSet<Integer>();
         partners.forEach((id, partner) -> {
@@ -1107,13 +1118,34 @@ public abstract class BaseOfficeDataImport extends CsvDataImport {
         person.setTradeName(contactRecord.getString("firma"));
         person.setPersonType(determinePersonType(contactRecord));
 
-        persons.put(contactRecord.getInteger("contact_id"), person);
+        persons.put(contactRecord.getInteger("contact_id"), distinctPerson(person));
         return person;
     }
 
+    // Makes sure, that for identical person-data always the same HsOfficePersonRealEntity is used.
+    // Such can come from multiple legacy contacts with different contact data for the same person in different confexts.
+    private HsOfficePersonRealEntity distinctPerson(final HsOfficePersonRealEntity person) {
+        if (person == null) {
+            return null;
+        }
+
+        final var personKey = (
+                person.getPersonType() + "|" +
+                person.getSalutation() + "|" +
+                person.getTradeName() + "|" +
+                person.getTitle() + "|" +
+                person.getGivenName() + "|" +
+                person.getFamilyName()
+        ).toLowerCase();
+
+        if ( !distinctPersons.containsKey(personKey) ) {
+            distinctPersons.put(personKey, person);
+        }
+        return distinctPersons.get(personKey);
+    }
+
     private static HsOfficePersonType determinePersonType(final Record contactRecord) {
         String roles = contactRecord.getString("roles");
-        String country = contactRecord.getString("country");
         String familyName = contactRecord.getString("last_name");
         String givenName = contactRecord.getString("first_name");
         String tradeName = contactRecord.getString("firma");
diff --git a/src/test/java/net/hostsharing/hsadminng/hs/migration/CsvDataImport.java b/src/test/java/net/hostsharing/hsadminng/hs/migration/CsvDataImport.java
index 8cfe04d7..2006a470 100644
--- a/src/test/java/net/hostsharing/hsadminng/hs/migration/CsvDataImport.java
+++ b/src/test/java/net/hostsharing/hsadminng/hs/migration/CsvDataImport.java
@@ -171,7 +171,9 @@ public class CsvDataImport extends ContextBasedTest {
 
     public <T extends BaseEntity> T persistViaEM(final Integer id, final T entity) {
         //System.out.println("persisting #" + entity.hashCode() + ": " + entity);
-        em.persist(entity);
+        if (!em.contains(entity)) {
+            em.persist(entity);
+        }
         // uncomment for debugging purposes
         // try {
         //     em.flush(); // makes it slow, but produces better error messages
diff --git a/src/test/resources/migration/office/contacts.csv b/src/test/resources/migration/office/contacts.csv
index eb58efae..f447a22a 100644
--- a/src/test/resources/migration/office/contacts.csv
+++ b/src/test/resources/migration/office/contacts.csv
@@ -31,5 +31,8 @@ contact_id;	bp_id;	salut;	first_name;	last_name;	title;	firma;	co;	street;	zipco
 90590;	542;	Herr;	Inhaber R.;	Wiese;	;	Das Perfekte Haus;	Client-ID 515217;	Essen, Kastanienallee 81;	30127;	Hannover;	Germany;	;	;	;	;	515217@kkemail.example.org;	billing
 90629;	132;	;	Ragnar;	Richter;	;	;	;	;	;	;	;	;	;	;	;	mail@ragnar-richter..example.org;	contractual,subscriber:members-announce,subscriber:members-discussion,subscriber:generalversammlung
 90677;	132;	;	Eike;	Henning;	;	;	;	;	;	;	;	;	;	;	;	hostsharing@eike-henning..example.org;	operation,subscriber:operations-announce,subscriber:operations-discussion
-90698;	132;	;	Jan;	Henning;	;	;	;	;	;	;	;	;	01577 12345678;	;	;	mail@jan-henning.example.org;	operation
+
+# deliberately duplicate person with different contact-data
+90698;	132;	;	Jan;	Henning;	;	;	;	;	;	;	;	;	01577 12345678;	;	;	mail@jan-henning.example.org;	operation
+90699;	132;	;	Jan;	Henning;	;	;	;	;	;	;	;	;	01577 12345678;	;	;	lists@jan-henning.example.org;	subscriber:operations-announce