error when trying to create diffs for data too old for server

lonvia · lonvia · commit abf3cc5fee16 · 2025-10-05T22:19:23.000+02:00
diff --git a/src/osmium/replication/server.py b/src/osmium/replication/server.py
@@ -305,16 +305,24 @@ def apply_diffs_to_file(self, infile: str, outfile: str, start_id: int,
         return (diffs.id, diffs.newest)
 
     def timestamp_to_sequence(self, timestamp: dt.datetime,
-                              balanced_search: bool = False) -> Optional[int]:
+                              balanced_search: bool = False,
+                              limit_by_oldest_available: bool = False) -> Optional[int]:
         """ Get the sequence number of the replication file that contains the
             given timestamp. The search algorithm is optimised for replication
             servers that publish updates in regular intervals. For servers
             with irregular change file publication dates 'balanced_search`
             should be set to true so that a standard binary search for the
             sequence will be used. The default is good for all known
             OSM replication services.
-        """
 
+            When `limit_by_oldest_available` is set, then the function will
+            return None when the server replication does not start at 0 and
+            the given timestamp is older than the oldest available timestamp
+            on the server. Some replication servers do not keep the full
+            history and this flag avoids accidentally trying to download older
+            data. The downside is that the function will never return the
+            oldest available sequence ID when the flag is set.
+        """
         # get the current timestamp from the server
         upper = self.get_state_info()
 
@@ -331,8 +339,10 @@ def timestamp_to_sequence(self, timestamp: dt.datetime,
             lower = self.get_state_info(lowerid)
 
             if lower is not None and lower.timestamp >= timestamp:
-                if lower.sequence == 0 or lower.sequence + 1 >= upper.sequence:
-                    return lower.sequence
+                if lower.sequence == 0:
+                    return 0
+                if lower.sequence + 1 >= upper.sequence:
+                    return None if limit_by_oldest_available else lower.sequence
                 upper = lower
                 lower = None
                 lowerid = 0
diff --git a/src/osmium/tools/common.py b/src/osmium/tools/common.py
@@ -33,7 +33,7 @@ def get_sequence(self, svr: ReplicationServer) -> Optional[int]:
 
         assert self.date is not None
         log.debug("Looking up sequence ID for timestamp %s" % self.date)
-        return svr.timestamp_to_sequence(self.date)
+        return svr.timestamp_to_sequence(self.date, limit_by_oldest_available=True)
 
     def get_end_sequence(self, svr: ReplicationServer) -> Optional[int]:
         if self.seq_id is not None:
diff --git a/test/test_pyosmium_up-to-date.py b/test/test_pyosmium_up-to-date.py
@@ -168,3 +168,9 @@ def test_update_with_enddate(test_data, runner, tmp_path):
     osmium.apply(newfile, ids)
 
     assert ids.relations == list(range(101, 106))
+
+
+def test_change_date_too_old_for_replication_source(test_data, runner):
+    outfile = test_data("n1 v1 t2070-04-05T06:30:00Z")
+
+    assert 3 == runner(outfile)