@@ -123,15 +123,45 @@ def check_for_duplicate_packages(self, packages_yml):
123123
124124 This method is called only during `dbt deps --add-package` to check if the package
125125 being added already exists in packages.yml. It uses substring matching to identify
126- duplicates, checking if the package name appears within package identifiers (such as
127- within git URLs, hub package names, or local paths).
126+ duplicates, which means it will match across different package sources. For example,
127+ adding a hub package "dbt-labs/dbt_utils" will remove an existing git package
128+ "https://github.com/dbt-labs/dbt-utils.git" since both contain "dbt_utils" or "dbt-utils".
129+
130+ The matching is flexible to handle both underscore and hyphen variants of package names,
131+ as git repos often use hyphens (dbt-utils) while package names use underscores (dbt_utils).
132+ Word boundaries (/, .) are enforced to prevent false matches like "dbt-core" matching
133+ "dbt-core-utils".
128134
129135 Args:
130136 packages_yml (dict): In-memory read of `packages.yml` contents
131137
132138 Returns:
133139 dict: Updated packages_yml contents with matching packages removed
134140 """
141+ # Extract the package name for matching
142+ package_name = self .args .add_package ["name" ]
143+
144+ # Create variants for flexible matching (handle _ vs -)
145+ # Check multiple variants to handle naming inconsistencies between hub and git
146+ package_name_parts = [
147+ package_name , # Original: "dbt-labs/dbt_utils"
148+ package_name .replace ("_" , "-" ), # Hyphens: "dbt-labs/dbt-utils"
149+ package_name .replace ("-" , "_" ), # Underscores: "dbt_labs/dbt_utils"
150+ ]
151+ # Extract just the package name without org (after last /)
152+ if "/" in package_name :
153+ short_name = package_name .split ("/" )[- 1 ]
154+ package_name_parts .extend (
155+ [
156+ short_name , # "dbt_utils"
157+ short_name .replace ("_" , "-" ), # "dbt-utils"
158+ short_name .replace ("-" , "_" ), # "dbt_utils" (deduplicated)
159+ ]
160+ )
161+
162+ # Remove duplicates from package_name_parts
163+ package_name_parts = list (set (package_name_parts ))
164+
135165 # Iterate backwards to safely delete items without index shifting issues
136166 for i in range (len (packages_yml ["packages" ]) - 1 , - 1 , - 1 ):
137167 pkg_entry = packages_yml ["packages" ][i ]
@@ -146,19 +176,40 @@ def check_for_duplicate_packages(self, packages_yml):
146176 or pkg_entry .get ("private" ) # private package
147177 )
148178
149- # Check if package name appears in the identifier using substring match
150- if package_identifier and self .args .add_package ["name" ] in package_identifier :
151- del packages_yml ["packages" ][i ]
152- # Filter out non-string values (like warn-unpinned boolean) before logging
153- # Note: Check for bool first since bool is a subclass of int in Python
154- loggable_package = {
155- k : v
156- for k , v in pkg_entry .items ()
157- if not isinstance (v , bool )
158- and isinstance (v , (str , int , float ))
159- and k != "unrendered"
160- }
161- fire_event (DepsFoundDuplicatePackage (removed_package = loggable_package ))
179+ # Check if any variant of the package name appears in the identifier
180+ # Use word boundaries to avoid false matches (e.g., "dbt-core" shouldn't match "dbt-core-utils")
181+ # Word boundaries are: start/end of string, /, or .
182+ # Note: - and _ are NOT boundaries since they're used within compound package names
183+ if package_identifier :
184+ is_duplicate = False
185+ for name_variant in package_name_parts :
186+ if name_variant in package_identifier :
187+ # Found a match, now verify it's not a substring of a larger word
188+ # Check characters before and after the match
189+ idx = package_identifier .find (name_variant )
190+ start_ok = idx == 0 or package_identifier [idx - 1 ] in "/."
191+ end_idx = idx + len (name_variant )
192+ end_ok = (
193+ end_idx == len (package_identifier )
194+ or package_identifier [end_idx ] in "/."
195+ )
196+
197+ if start_ok and end_ok :
198+ is_duplicate = True
199+ break
200+
201+ if is_duplicate :
202+ del packages_yml ["packages" ][i ]
203+ # Filter out non-string values (like warn-unpinned boolean) before logging
204+ # Note: Check for bool first since bool is a subclass of int in Python
205+ loggable_package = {
206+ k : v
207+ for k , v in pkg_entry .items ()
208+ if not isinstance (v , bool )
209+ and isinstance (v , (str , int , float ))
210+ and k != "unrendered"
211+ }
212+ fire_event (DepsFoundDuplicatePackage (removed_package = loggable_package ))
162213
163214 return packages_yml
164215
0 commit comments